Bump bundled libarchive version to 3.5.2

- Update bunlded libarchive version used on Windows/Mac
- Enable requested zstd support while we are at it. Closes #211
This commit is contained in:
Floris Bos 2021-12-09 12:22:14 +01:00
parent 03e083b4f3
commit 67618a2eac
1869 changed files with 166685 additions and 9489 deletions

View file

@ -0,0 +1,39 @@
# local binary (Makefile)
zstd
zstd32
zstd4
zstd-compress
zstd-decompress
zstd-frugal
zstd-small
zstd-nolegacy
zstd-dictBuilder
zstd-dll
# Object files
*.o
*.ko
default.profraw
have_zlib
# Executables
*.exe
*.out
*.app
# Default result files
dictionary
grillResults.txt
_*
tmp*
*.zst
result
out
# fuzzer
afl
# Misc files
*.bat
!windres/generate_res.bat
dirTest*

44
dependencies/zstd-1.5.0/programs/BUCK vendored Normal file
View file

@ -0,0 +1,44 @@
cxx_binary(
name='zstd',
headers=glob(['*.h'], excludes=['datagen.h', 'platform.h', 'util.h']),
srcs=glob(['*.c'], excludes=['datagen.c']),
deps=[
':datagen',
':util',
'//lib:zstd',
'//lib:zdict',
'//lib:mem',
'//lib:xxhash',
],
preprocessor_flags=[
'-DZSTD_GZCOMPRESS',
'-DZSTD_GZDECOMPRESS',
'-DZSTD_LZMACOMPRESS',
'-DZSTD_LZMADECOMPRES',
'-DZSTD_LZ4COMPRESS',
'-DZSTD_LZ4DECOMPRES',
],
linker_flags=[
'-lz',
'-llzma',
'-llz4',
],
)
cxx_library(
name='datagen',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=['datagen.h'],
srcs=['datagen.c'],
deps=['//lib:mem'],
)
cxx_library(
name='util',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=['util.h', 'platform.h'],
deps=['//lib:mem'],
)

View file

@ -0,0 +1,511 @@
# ################################################################
# Copyright (c) Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ##########################################################################
# zstd : Command Line Utility, supporting gzip-like arguments
# zstd32 : Same as zstd, but forced to compile in 32-bits mode
# zstd_nolegacy : zstd without support of decompression of legacy versions
# zstd-small : minimal zstd without dictionary builder and benchmark
# zstd-compress : compressor-only version of zstd
# zstd-decompress : decompressor-only version of zstd
# ##########################################################################
.PHONY: default
default: zstd-release
# silent mode by default; verbose can be triggered by V=1 or VERBOSE=1
$(V)$(VERBOSE).SILENT:
ZSTDDIR := ../lib
# Version numbers
LIBVER_SRC := $(ZSTDDIR)/zstd.h
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)`
LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT))
LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
LIBVER := $(shell echo $(LIBVER_SCRIPT))
ZSTD_VERSION = $(LIBVER)
HAVE_COLORNEVER = $(shell echo a | grep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
GREP_OPTIONS ?=
ifeq ($HAVE_COLORNEVER, 1)
GREP_OPTIONS += --color=never
endif
GREP = grep $(GREP_OPTIONS)
ifeq ($(shell $(CC) -v 2>&1 | $(GREP) -c "gcc version "), 1)
ALIGN_LOOP = -falign-loops=32
else
ALIGN_LOOP =
endif
DEBUGLEVEL ?= 0
CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -DDEBUGLEVEL=$(DEBUGLEVEL)
ifeq ($(OS),Windows_NT) # MinGW assumed
CPPFLAGS += -D__USE_MINGW_ANSI_STDIO # compatibility with %zu formatting
endif
CFLAGS ?= -O3
DEBUGFLAGS+=-Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef -Wpointer-arith \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls -Wmissing-prototypes -Wc++-compat
CFLAGS += $(DEBUGFLAGS)
CPPFLAGS += $(MOREFLAGS)
LDFLAGS += $(MOREFLAGS)
FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS)
ZSTDLIB_COMMON := $(ZSTDDIR)/common
ZSTDLIB_COMPRESS := $(ZSTDDIR)/compress
ZSTDLIB_DECOMPRESS := $(ZSTDDIR)/decompress
ZDICT_DIR := $(ZSTDDIR)/dictBuilder
ZSTDLEGACY_DIR := $(ZSTDDIR)/legacy
vpath %.c $(ZSTDLIB_COMMON) $(ZSTDLIB_COMPRESS) $(ZSTDLIB_DECOMPRESS) $(ZDICT_DIR) $(ZSTDLEGACY_DIR)
ZSTDLIB_COMMON_C := $(wildcard $(ZSTDLIB_COMMON)/*.c)
ZSTDLIB_COMPRESS_C := $(wildcard $(ZSTDLIB_COMPRESS)/*.c)
ZSTDLIB_DECOMPRESS_C := $(wildcard $(ZSTDLIB_DECOMPRESS)/*.c)
ZSTDLIB_CORE_SRC := $(ZSTDLIB_DECOMPRESS_C) $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C)
ZDICT_SRC := $(wildcard $(ZDICT_DIR)/*.c)
ZSTD_LEGACY_SUPPORT ?= 5
ZSTDLEGACY_SRC :=
ifneq ($(ZSTD_LEGACY_SUPPORT), 0)
ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0)
ZSTDLEGACY_SRC += $(shell ls $(ZSTDLEGACY_DIR)/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]')
endif
endif
# Sort files in alphabetical order for reproducible builds
ZSTDLIB_FULL_SRC = $(sort $(ZSTDLIB_CORE_SRC) $(ZSTDLEGACY_SRC) $(ZDICT_SRC))
ZSTDLIB_LOCAL_SRC = $(notdir $(ZSTDLIB_FULL_SRC))
ZSTDLIB_LOCAL_OBJ := $(ZSTDLIB_LOCAL_SRC:.c=.o)
ZSTD_CLI_SRC := $(wildcard *.c)
ZSTD_CLI_OBJ := $(ZSTD_CLI_SRC:.c=.o)
ZSTD_ALL_SRC = $(ZSTDLIB_LOCAL_SRC) $(ZSTD_CLI_SRC)
ZSTD_ALL_OBJ := $(ZSTD_ALL_SRC:.c=.o)
UNAME := $(shell uname)
ifndef BUILD_DIR
ifeq ($(UNAME), Darwin)
ifeq ($(shell md5 < /dev/null > /dev/null; echo $$?), 0)
HASH ?= md5
endif
else ifeq ($(UNAME), FreeBSD)
HASH ?= gmd5sum
else ifeq ($(UNAME), NetBSD)
HASH ?= md5 -n
else ifeq ($(UNAME), OpenBSD)
HASH ?= md5
endif
HASH ?= md5sum
HAVE_HASH :=$(shell echo 1 | $(HASH) > /dev/null && echo 1 || echo 0)
HASH_DIR = conf_$(shell echo $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(ZSTD_FILES) | $(HASH) | cut -f 1 -d " ")
ifeq ($(HAVE_HASH),0)
$(info warning : could not find HASH ($(HASH)), needed to differentiate builds using different flags)
BUILD_DIR := obj/generic_noconf
endif
endif # BUILD_DIR
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
RES64_FILE = windres/zstd64.res
RES32_FILE = windres/zstd32.res
ifneq (,$(filter x86_64%,$(shell $(CC) -dumpmachine)))
RES_FILE = $(RES64_FILE)
else
RES_FILE = $(RES32_FILE)
endif
else
EXT =
endif
VOID = /dev/null
# Make 4.3 doesn't support '\#' anymore (https://lwn.net/Articles/810071/)
NUM_SYMBOL := \#
# thread detection
NO_THREAD_MSG := ==> no threads, building without multithreading support
HAVE_PTHREAD := $(shell printf '$(NUM_SYMBOL)include <pthread.h>\nint main(void) { return 0; }' > have_pthread.c && $(CC) $(FLAGS) -o have_pthread$(EXT) have_pthread.c -pthread 2> $(VOID) && rm have_pthread$(EXT) && echo 1 || echo 0; rm have_pthread.c)
HAVE_THREAD := $(shell [ "$(HAVE_PTHREAD)" -eq "1" -o -n "$(filter Windows%,$(OS))" ] && echo 1 || echo 0)
ifeq ($(HAVE_THREAD), 1)
THREAD_MSG := ==> building with threading support
THREAD_CPP := -DZSTD_MULTITHREAD
THREAD_LD := -pthread
else
THREAD_MSG := $(NO_THREAD_MSG)
endif
# zlib detection
NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support
HAVE_ZLIB := $(shell printf '$(NUM_SYMBOL)include <zlib.h>\nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c)
ifeq ($(HAVE_ZLIB), 1)
ZLIB_MSG := ==> building zstd with .gz compression support
ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS
ZLIBLD = -lz
else
ZLIB_MSG := $(NO_ZLIB_MSG)
endif
# lzma detection
NO_LZMA_MSG := ==> no liblzma, building zstd without .xz/.lzma support
HAVE_LZMA := $(shell printf '$(NUM_SYMBOL)include <lzma.h>\nint main(void) { return 0; }' > have_lzma.c && $(CC) $(FLAGS) -o have_lzma$(EXT) have_lzma.c -llzma 2> $(VOID) && rm have_lzma$(EXT) && echo 1 || echo 0; rm have_lzma.c)
ifeq ($(HAVE_LZMA), 1)
LZMA_MSG := ==> building zstd with .xz/.lzma compression support
LZMACPP = -DZSTD_LZMACOMPRESS -DZSTD_LZMADECOMPRESS
LZMALD = -llzma
else
LZMA_MSG := $(NO_LZMA_MSG)
endif
# lz4 detection
NO_LZ4_MSG := ==> no liblz4, building zstd without .lz4 support
HAVE_LZ4 := $(shell printf '$(NUM_SYMBOL)include <lz4frame.h>\n$(NUM_SYMBOL)include <lz4.h>\nint main(void) { return 0; }' > have_lz4.c && $(CC) $(FLAGS) -o have_lz4$(EXT) have_lz4.c -llz4 2> $(VOID) && rm have_lz4$(EXT) && echo 1 || echo 0; rm have_lz4.c)
ifeq ($(HAVE_LZ4), 1)
LZ4_MSG := ==> building zstd with .lz4 compression support
LZ4CPP = -DZSTD_LZ4COMPRESS -DZSTD_LZ4DECOMPRESS
LZ4LD = -llz4
else
LZ4_MSG := $(NO_LZ4_MSG)
endif
# explicit backtrace enable/disable for Linux & Darwin
ifeq ($(BACKTRACE), 0)
DEBUGFLAGS += -DBACKTRACE_ENABLE=0
endif
ifeq (,$(filter Windows%, $(OS)))
ifeq ($(BACKTRACE), 1)
DEBUGFLAGS += -DBACKTRACE_ENABLE=1
DEBUGFLAGS_LD += -rdynamic
endif
endif
SET_CACHE_DIRECTORY = \
+$(MAKE) --no-print-directory $@ \
BUILD_DIR=obj/$(HASH_DIR) \
CPPFLAGS="$(CPPFLAGS)" \
CFLAGS="$(CFLAGS)" \
LDFLAGS="$(LDFLAGS)" \
LDLIBS="$(LDLIBS)" \
ZSTD_ALL_SRC="$(ZSTD_ALL_SRC)"
.PHONY: all
all: zstd
.PHONY: allVariants
allVariants: zstd zstd-compress zstd-decompress zstd-small zstd-nolegacy zstd-dictBuilder
.PHONY: zstd # must always be run
zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
zstd : LDFLAGS += $(THREAD_LD) $(DEBUGFLAGS_LD)
zstd : LDLIBS += $(ZLIBLD) $(LZMALD) $(LZ4LD)
zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
ifneq (,$(filter Windows%,$(OS)))
zstd : $(RES_FILE)
endif
ifndef BUILD_DIR
# generate BUILD_DIR from flags
zstd:
$(SET_CACHE_DIRECTORY)
else
# BUILD_DIR is defined
ZSTD_OBJ := $(addprefix $(BUILD_DIR)/, $(ZSTD_ALL_OBJ))
$(BUILD_DIR)/zstd : $(ZSTD_OBJ)
@echo "$(THREAD_MSG)"
@echo "$(ZLIB_MSG)"
@echo "$(LZMA_MSG)"
@echo "$(LZ4_MSG)"
@echo LINK $@
$(CC) $(FLAGS) $^ $(LDLIBS) -o $@$(EXT)
ifeq ($(HAVE_HASH),1)
SRCBIN_HASH = $(shell cat $(BUILD_DIR)/zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ")
DSTBIN_HASH = $(shell cat zstd 2> $(VOID) | $(HASH) | cut -f 1 -d " ")
BIN_ISDIFFERENT = $(if $(filter $(SRCBIN_HASH),$(DSTBIN_HASH)),0,1)
else
BIN_ISDIFFERENT = 1
endif
zstd : $(BUILD_DIR)/zstd
if [ $(BIN_ISDIFFERENT) -eq 1 ]; then \
cp -f $< $@; \
echo zstd build completed; \
else \
echo zstd already built; \
fi
endif # BUILD_DIR
.PHONY: zstd-release
zstd-release: DEBUGFLAGS := -DBACKTRACE_ENABLE=0
zstd-release: DEBUGFLAGS_LD :=
zstd-release: zstd
zstd32 : CPPFLAGS += $(THREAD_CPP)
zstd32 : LDFLAGS += $(THREAD_LD)
zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
ifneq (,$(filter Windows%,$(OS)))
zstd32 : $(RES32_FILE)
endif
zstd32 : $(ZSTDLIB_FULL_SRC) $(ZSTD_CLI_SRC)
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
## zstd-nolegacy: same scope as zstd, with just support of legacy formats removed
zstd-nolegacy : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
zstd-nolegacy : $(ZSTDLIB_CORE_SRC) $(ZDICT_SRC) $(ZSTD_CLI_OBJ)
$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
.PHONY: zstd-nomt
zstd-nomt : THREAD_CPP :=
zstd-nomt : THREAD_LD :=
zstd-nomt : THREAD_MSG := - multi-threading disabled
zstd-nomt : zstd
.PHONY: zstd-nogz
zstd-nogz : ZLIBCPP :=
zstd-nogz : ZLIBLD :=
zstd-nogz : ZLIB_MSG := - gzip support is disabled
zstd-nogz : zstd
.PHONY: zstd-noxz
zstd-noxz : LZMACPP :=
zstd-noxz : LZMALD :=
zstd-noxz : LZMA_MSG := - xz/lzma support is disabled
zstd-noxz : zstd
## zstd-dll: zstd executable linked to dynamic library libzstd (must have same version)
.PHONY: zstd-dll
zstd-dll : LDFLAGS+= -L$(ZSTDDIR)
zstd-dll : LDLIBS += -lzstd
zstd-dll : ZSTDLIB_LOCAL_SRC = xxhash.c
zstd-dll : zstd
## zstd-pgo: zstd executable optimized with PGO.
.PHONY: zstd-pgo
zstd-pgo :
$(MAKE) clean
$(MAKE) zstd MOREFLAGS=-fprofile-generate
./zstd -b19i1 $(PROFILE_WITH)
./zstd -b16i1 $(PROFILE_WITH)
./zstd -b9i2 $(PROFILE_WITH)
./zstd -b $(PROFILE_WITH)
./zstd -b7i2 $(PROFILE_WITH)
./zstd -b5 $(PROFILE_WITH)
$(RM) zstd *.o
case $(CC) in *clang*) if ! [ -e default.profdata ]; then llvm-profdata merge -output=default.profdata default*.profraw; fi ;; esac
$(MAKE) zstd MOREFLAGS=-fprofile-use
## zstd-small: minimal target, supporting only zstd compression and decompression. no bench. no legacy. no other format.
zstd-small: CFLAGS = -Os -s
zstd-frugal zstd-small: $(ZSTDLIB_CORE_SRC) zstdcli.c util.c timefn.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOTRACE $^ -o $@$(EXT)
zstd-decompress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_DECOMPRESS_C) zstdcli.c util.c timefn.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NOCOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT)
zstd-compress: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) zstdcli.c util.c timefn.c fileio.c
$(CC) $(FLAGS) -DZSTD_NOBENCH -DZSTD_NODICT -DZSTD_NODECOMPRESS -DZSTD_NOTRACE $^ -o $@$(EXT)
## zstd-dictBuilder: executable supporting dictionary creation and compression (only)
zstd-dictBuilder: CPPFLAGS += -DZSTD_NOBENCH -DZSTD_NODECOMPRESS -DZSTD_NOTRACE
zstd-dictBuilder: $(ZSTDLIB_COMMON_C) $(ZSTDLIB_COMPRESS_C) $(ZDICT_SRC) zstdcli.c util.c timefn.c fileio.c dibio.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zstdmt: zstd
ln -sf zstd zstdmt
.PHONY: generate_res
generate_res: $(RES64_FILE) $(RES32_FILE)
ifneq (,$(filter Windows%,$(OS)))
RC ?= windres
# http://stackoverflow.com/questions/708238/how-do-i-add-an-icon-to-a-mingw-gcc-compiled-executable
$(RES64_FILE): windres/zstd.rc
$(RC) -o $@ -I ../lib -I windres -i $< -O coff -F pe-x86-64
$(RES32_FILE): windres/zstd.rc
$(RC) -o $@ -I ../lib -I windres -i $< -O coff -F pe-i386
endif
.PHONY: clean
clean:
$(RM) core *.o tmp* result* *.gcda dictionary *.zst \
zstd$(EXT) zstd32$(EXT) zstd-dll$(EXT) \
zstd-compress$(EXT) zstd-decompress$(EXT) \
zstd-small$(EXT) zstd-frugal$(EXT) zstd-nolegacy$(EXT) zstd4$(EXT) \
zstd-dictBuilder$(EXT) \
*.gcda default*.profraw default.profdata have_zlib$(EXT)
$(RM) -r obj/*
@echo Cleaning completed
MD2ROFF = ronn
MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="zstd $(ZSTD_VERSION)"
zstd.1: zstd.1.md ../lib/zstd.h
cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
zstdgrep.1: zstdgrep.1.md ../lib/zstd.h
cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
zstdless.1: zstdless.1.md ../lib/zstd.h
cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
.PHONY: man
man: zstd.1 zstdgrep.1 zstdless.1
.PHONY: clean-man
clean-man:
$(RM) zstd.1
$(RM) zstdgrep.1
$(RM) zstdless.1
.PHONY: preview-man
preview-man: clean-man man
man ./zstd.1
man ./zstdgrep.1
man ./zstdless.1
# Generate .h dependencies automatically
DEPFLAGS = -MT $@ -MMD -MP -MF
$(BUILD_DIR)/%.o : %.c $(BUILD_DIR)/%.d | $(BUILD_DIR)
@echo CC $@
$(COMPILE.c) $(DEPFLAGS) $(BUILD_DIR)/$*.d $(OUTPUT_OPTION) $<
MKDIR ?= mkdir
$(BUILD_DIR): ; $(MKDIR) -p $@
DEPFILES := $(ZSTD_OBJ:.o=.d)
$(DEPFILES):
include $(wildcard $(DEPFILES))
#-----------------------------------------------------------------------------
# make install is validated only for Linux, macOS, BSD, Hurd and Solaris targets
#-----------------------------------------------------------------------------
ifneq (,$(filter $(UNAME),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS Haiku))
HAVE_COLORNEVER = $(shell echo a | egrep --color=never a > /dev/null 2> /dev/null && echo 1 || echo 0)
EGREP_OPTIONS ?=
ifeq ($HAVE_COLORNEVER, 1)
EGREP_OPTIONS += --color=never
endif
EGREP = egrep $(EGREP_OPTIONS)
AWK = awk
# Print a two column output of targets and their description. To add a target description, put a
# comment in the Makefile with the format "## <TARGET>: <DESCRIPTION>". For example:
#
## list: Print all targets and their descriptions (if provided)
.PHONY: list
list:
TARGETS=$$($(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null \
| $(AWK) -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' \
| $(EGREP) -v -e '^[^[:alnum:]]' | sort); \
{ \
printf "Target Name\tDescription\n"; \
printf "%0.s-" {1..16}; printf "\t"; printf "%0.s-" {1..40}; printf "\n"; \
for target in $$TARGETS; do \
line=$$($(EGREP) "^##[[:space:]]+$$target:" $(lastword $(MAKEFILE_LIST))); \
description=$$(echo $$line | $(AWK) '{i=index($$0,":"); print substr($$0,i+1)}' | xargs); \
printf "$$target\t$$description\n"; \
done \
} | column -t -s $$'\t'
DESTDIR ?=
# directory variables : GNU conventions prefer lowercase
# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
# support both lower and uppercase (BSD), use uppercase in script
prefix ?= /usr/local
PREFIX ?= $(prefix)
exec_prefix ?= $(PREFIX)
bindir ?= $(exec_prefix)/bin
BINDIR ?= $(bindir)
datarootdir ?= $(PREFIX)/share
mandir ?= $(datarootdir)/man
man1dir ?= $(mandir)/man1
ifneq (,$(filter $(UNAME),OpenBSD FreeBSD NetBSD DragonFly SunOS))
MANDIR ?= $(PREFIX)/man
MAN1DIR ?= $(MANDIR)/man1
else
MAN1DIR ?= $(man1dir)
endif
ifneq (,$(filter $(UNAME),SunOS))
INSTALL ?= ginstall
else
INSTALL ?= install
endif
INSTALL_PROGRAM ?= $(INSTALL)
INSTALL_SCRIPT ?= $(INSTALL_PROGRAM)
INSTALL_DATA ?= $(INSTALL) -m 644
INSTALL_MAN ?= $(INSTALL_DATA)
.PHONY: install
install:
# generate zstd only if not already present
[ -e zstd ] || $(MAKE) zstd-release
[ -e $(DESTDIR)$(BINDIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)/
[ -e $(DESTDIR)$(MAN1DIR) ] || $(INSTALL) -d -m 755 $(DESTDIR)$(MAN1DIR)/
@echo Installing binaries
$(INSTALL_PROGRAM) zstd$(EXT) $(DESTDIR)$(BINDIR)/zstd$(EXT)
ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdcat$(EXT)
ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/unzstd$(EXT)
ln -sf zstd$(EXT) $(DESTDIR)$(BINDIR)/zstdmt$(EXT)
$(INSTALL_SCRIPT) zstdless $(DESTDIR)$(BINDIR)/zstdless
$(INSTALL_SCRIPT) zstdgrep $(DESTDIR)$(BINDIR)/zstdgrep
@echo Installing man pages
$(INSTALL_MAN) zstd.1 $(DESTDIR)$(MAN1DIR)/zstd.1
ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/zstdcat.1
ln -sf zstd.1 $(DESTDIR)$(MAN1DIR)/unzstd.1
$(INSTALL_MAN) zstdgrep.1 $(DESTDIR)$(MAN1DIR)/zstdgrep.1
$(INSTALL_MAN) zstdless.1 $(DESTDIR)$(MAN1DIR)/zstdless.1
@echo zstd installation completed
.PHONY: uninstall
uninstall:
$(RM) $(DESTDIR)$(BINDIR)/zstdgrep
$(RM) $(DESTDIR)$(BINDIR)/zstdless
$(RM) $(DESTDIR)$(BINDIR)/zstdcat
$(RM) $(DESTDIR)$(BINDIR)/unzstd
$(RM) $(DESTDIR)$(BINDIR)/zstdmt
$(RM) $(DESTDIR)$(BINDIR)/zstd
$(RM) $(DESTDIR)$(MAN1DIR)/zstdless.1
$(RM) $(DESTDIR)$(MAN1DIR)/zstdgrep.1
$(RM) $(DESTDIR)$(MAN1DIR)/zstdcat.1
$(RM) $(DESTDIR)$(MAN1DIR)/unzstd.1
$(RM) $(DESTDIR)$(MAN1DIR)/zstd.1
@echo zstd programs successfully uninstalled
endif

View file

@ -0,0 +1,300 @@
Command Line Interface for Zstandard library
============================================
Command Line Interface (CLI) can be created using the `make` command without any additional parameters.
There are however other Makefile targets that create different variations of CLI:
- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and supports decompression of legacy zstd formats
- `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats
- `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats
- `zstd-compress` : version of CLI which can only compress into zstd format
- `zstd-decompress` : version of CLI which can only decompress zstd format
### Compilation variables
`zstd` scope can be altered by modifying the following `make` variables :
- __HAVE_THREAD__ : multithreading is automatically enabled when `pthread` is detected.
It's possible to disable multithread support, by setting `HAVE_THREAD=0`.
Example : `make zstd HAVE_THREAD=0`
It's also possible to force multithread support, using `HAVE_THREAD=1`.
In which case, linking stage will fail if neither `pthread` nor `windows.h` library can be found.
This is useful to ensure this feature is not silently disabled.
- __ZSTD_LEGACY_SUPPORT__ : `zstd` can decompress files compressed by older versions of `zstd`.
Starting v0.8.0, all versions of `zstd` produce frames compliant with the [specification](../doc/zstd_compression_format.md), and are therefore compatible.
But older versions (< v0.8.0) produced different, incompatible, frames.
By default, `zstd` supports decoding legacy formats >= v0.4.0 (`ZSTD_LEGACY_SUPPORT=4`).
This can be altered by modifying this compilation variable.
`ZSTD_LEGACY_SUPPORT=1` means "support all formats >= v0.1.0".
`ZSTD_LEGACY_SUPPORT=2` means "support all formats >= v0.2.0", and so on.
`ZSTD_LEGACY_SUPPORT=0` means _DO NOT_ support any legacy format.
if `ZSTD_LEGACY_SUPPORT >= 8`, it's the same as `0`, since there is no legacy format after `7`.
Note : `zstd` only supports decoding older formats, and cannot generate any legacy format.
- __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format.
This is ordered through command `--format=gzip`.
Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior.
`.gz` support is automatically enabled when `zlib` library is detected at build time.
It's possible to disable `.gz` support, by setting `HAVE_ZLIB=0`.
Example : `make zstd HAVE_ZLIB=0`
It's also possible to force compilation with zlib support, using `HAVE_ZLIB=1`.
In which case, linking stage will fail if `zlib` library cannot be found.
This is useful to prevent silent feature disabling.
- __HAVE_LZMA__ : `zstd` can compress and decompress files in `.xz` and `.lzma` formats.
This is ordered through commands `--format=xz` and `--format=lzma` respectively.
Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior.
`.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time.
It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0`.
Example : `make zstd HAVE_LZMA=0`
It's also possible to force compilation with lzma support, using `HAVE_LZMA=1`.
In which case, linking stage will fail if `lzma` library cannot be found.
This is useful to prevent silent feature disabling.
- __HAVE_LZ4__ : `zstd` can compress and decompress files in `.lz4` formats.
This is ordered through commands `--format=lz4`.
Alternatively, symlinks named `lz4`, or `unlz4` will mimic intended behavior.
`.lz4` support is automatically enabled when `lz4` library is detected at build time.
It's possible to disable `.lz4` support, by setting `HAVE_LZ4=0` .
Example : `make zstd HAVE_LZ4=0`
It's also possible to force compilation with lz4 support, using `HAVE_LZ4=1`.
In which case, linking stage will fail if `lz4` library cannot be found.
This is useful to prevent silent feature disabling.
- __ZSTD_NOBENCH__ : `zstd` cli will be compiled without its integrated benchmark module.
This can be useful to produce smaller binaries.
In this case, the corresponding unit can also be excluded from compilation target.
- __ZSTD_NODICT__ : `zstd` cli will be compiled without support for the integrated dictionary builder.
This can be useful to produce smaller binaries.
In this case, the corresponding unit can also be excluded from compilation target.
- __ZSTD_NOCOMPRESS__ : `zstd` cli will be compiled without support for compression.
The resulting binary will only be able to decompress files.
This can be useful to produce smaller binaries.
A corresponding `Makefile` target using this ability is `zstd-decompress`.
- __ZSTD_NODECOMPRESS__ : `zstd` cli will be compiled without support for decompression.
The resulting binary will only be able to compress files.
This can be useful to produce smaller binaries.
A corresponding `Makefile` target using this ability is `zstd-compress`.
- __BACKTRACE__ : `zstd` can display a stack backtrace when execution
generates a runtime exception. By default, this feature may be
degraded/disabled on some platforms unless additional compiler directives are
applied. When triaging a runtime issue, enabling this feature can provide
more context to determine the location of the fault.
Example : `make zstd BACKTRACE=1`
### Aggregation of parameters
CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`.
### Symlink shortcuts
It's possible to invoke `zstd` through a symlink.
When the name of the symlink has a specific value, it triggers an associated behavior.
- `zstdmt` : compress using all cores available on local system.
- `zcat` : will decompress and output target file using any of the supported formats. `gzcat` and `zstdcat` are also equivalent.
- `gzip` : if zlib support is enabled, will mimic `gzip` by compressing file using `.gz` format, removing source file by default (use `--keep` to preserve). If zlib is not supported, triggers an error.
- `xz` : if lzma support is enabled, will mimic `xz` by compressing file using `.xz` format, removing source file by default (use `--keep` to preserve). If xz is not supported, triggers an error.
- `lzma` : if lzma support is enabled, will mimic `lzma` by compressing file using `.lzma` format, removing source file by default (use `--keep` to preserve). If lzma is not supported, triggers an error.
- `lz4` : if lz4 support is enabled, will mimic `lz4` by compressing file using `.lz4` format. If lz4 is not supported, triggers an error.
- `unzstd` and `unlz4` will decompress any of the supported format.
- `ungz`, `unxz` and `unlzma` will do the same, and will also remove source file by default (use `--keep` to preserve).
### Dictionary builder in Command Line Interface
Zstd offers a training mode, which can be used to tune the algorithm for a selected
type of data, by providing it with a few samples. The result of the training is stored
in a file selected with the `-o` option (default name is `dictionary`),
which can be loaded before compression and decompression.
Using a dictionary, the compression ratio achievable on small data improves dramatically.
These compression gains are achieved while simultaneously providing faster compression and decompression speeds.
Dictionary work if there is some correlation in a family of small data (there is no universal dictionary).
Hence, deploying one dictionary per type of data will provide the greater benefits.
Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm
will rely more and more on previously decoded content to compress the rest of the file.
Usage of the dictionary builder and created dictionaries with CLI:
1. Create the dictionary : `zstd --train PathToTrainingSet/* -o dictionaryName`
2. Compress with the dictionary: `zstd FILE -D dictionaryName`
3. Decompress with the dictionary: `zstd --decompress FILE.zst -D dictionaryName`
### Benchmark in Command Line Interface
CLI includes in-memory compression benchmark module for zstd.
The benchmark is conducted using given filenames. The files are read into memory and joined together.
It makes benchmark more precise as it eliminates I/O overhead.
Multiple filenames can be supplied, as multiple parameters, with wildcards,
or names of directories can be used as parameters with `-r` option.
The benchmark measures ratio, compressed size, compression and decompression speed.
One can select compression levels starting from `-b` and ending with `-e`.
The `-i` parameter selects minimal time used for each of tested levels.
### Usage of Command Line Interface
The full list of options can be obtained with `-h` or `-H` parameter:
```
Usage :
zstd [args] [FILE(s)] [-o file]
FILE : a filename
with no FILE, or when FILE is - , read standard input
Arguments :
-# : # compression level (1-19, default: 3)
-d : decompression
-D DICT: use DICT as Dictionary for compression or decompression
-o file: result stored into `file` (only 1 output file)
-f : overwrite output without prompting, also (de)compress links
--rm : remove source file(s) after successful de/compression
-k : preserve source file(s) (default)
-h/-H : display help/long help and exit
Advanced arguments :
-V : display Version number and exit
-c : force write to standard output, even if it is the console
-v : verbose mode; specify multiple times to increase verbosity
-q : suppress warnings; specify twice to suppress errors too
--no-progress : do not display the progress counter
-r : operate recursively on directories
--filelist FILE : read list of files to operate upon from FILE
--output-dir-flat DIR : processed files are stored into DIR
--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure
--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).
-- : All arguments after "--" are treated as files
Advanced compression arguments :
--ultra : enable levels beyond 19, up to 22 (requires more memory)
--long[=#]: enable long distance matching with given window log (default: 27)
--fast[=#]: switch to very fast compression levels (default: 1)
--adapt : dynamically adapt compression level to I/O conditions
-T# : spawns # compression threads (default: 1, 0==# cores)
-B# : select size of each job (default: 0==automatic)
--single-thread : use a single thread for both I/O and compression (result slightly different than -T1)
--rsyncable : compress using a rsync-friendly method (-B sets block size)
--exclude-compressed: only compress files that are not already compressed
--stream-size=# : specify size of streaming input from `stdin`
--size-hint=# optimize compression parameters for streaming input of approximately this size
--target-compressed-block-size=# : generate compressed block of approximately targeted size
--no-dictID : don't write dictID into header (dictionary compression only)
--[no-]compress-literals : force (un)compressed literals
--format=zstd : compress files to the .zst format (default)
--format=gzip : compress files to the .gz format
--format=xz : compress files to the .xz format
--format=lzma : compress files to the .lzma format
--format=lz4 : compress files to the .lz4 format
Advanced decompression arguments :
-l : print information about zstd compressed files
--test : test compressed file integrity
-M# : Set a memory usage limit for decompression
--[no-]sparse : sparse mode (default: disabled)
Dictionary builder :
--train ## : create a dictionary from a training set of files
--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args
--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args
--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9)
-o DICT : DICT is dictionary name (default: dictionary)
--maxdict=# : limit dictionary to specified size (default: 112640)
--dictID=# : force dictionary ID to specified value (default: random)
Benchmark arguments :
-b# : benchmark file(s), using # compression level (default: 3)
-e# : test all compression levels successively from -b# to -e# (default: 1)
-i# : minimum evaluation time in seconds (default: 3s)
-B# : cut file into independent blocks of size # (default: no block)
-S : output one benchmark result per input file (default: consolidated result)
--priority=rt : set process priority to real-time
```
### Passing parameters through Environment Variables
There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner.
Using environment variables for this purpose has security implications.
Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`.
`ZSTD_CLEVEL` can be used to modify the default compression level of `zstd`
(usually set to `3`) to another value between 1 and 19 (the "normal" range).
`ZSTD_NBTHREADS` can be used to specify a number of threads
that `zstd` will use for compression, which by default is `1`.
This functionality only exists when `zstd` is compiled with multithread support.
`0` means "use as many threads as detected cpu cores on local system".
The max # of threads is capped at `ZSTDMT_NBWORKERS_MAX`,
which is either 64 in 32-bit mode, or 256 for 64-bit environments.
This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments.
One such scenario is `tar --zstd`.
As `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` only replace the default compression level
and number of threads respectively, they can both be overridden by corresponding command line arguments:
`-#` for compression level and `-T#` for number of threads.
### Long distance matching mode
The long distance matching mode, enabled with `--long`, is designed to improve
the compression ratio for files with long matches at a large distance (up to the
maximum window size, `128 MiB`) while still maintaining compression speed.
Enabling this mode sets the window size to `128 MiB` and thus increases the memory
usage for both the compressor and decompressor. Performance in terms of speed is
dependent on long matches being found. Compression speed may degrade if few long
matches are found. Decompression speed usually improves when there are many long
distance matches.
Below are graphs comparing the compression speed, compression ratio, and
decompression speed with and without long distance matching on an ideal use
case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`,
`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there
are many long distance matches within the maximum window size of `128 MiB` (each
version is less than `128 MiB`).
Compression Speed vs Ratio | Decompression Speed
---------------------------|---------------------
![Compression Speed vs Ratio](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmDspeed.png "Decompression Speed")
| Method | Compression ratio | Compression speed | Decompression speed |
|:-------|------------------:|-------------------------:|---------------------------:|
| `zstd -1` | `5.065` | `284.8 MB/s` | `759.3 MB/s` |
| `zstd -5` | `5.826` | `124.9 MB/s` | `674.0 MB/s` |
| `zstd -10` | `6.504` | `29.5 MB/s` | `771.3 MB/s` |
| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` |
| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s` |
| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s` |
On this file, the compression ratio improves significantly with minimal impact
on compression speed, and the decompression speed doubles.
On the other extreme, compressing a file with few long distance matches (such as
the [Silesia compression corpus]) will likely lead to a deterioration in
compression speed (for lower levels) with minimal change in compression ratio.
The below table illustrates this on the [Silesia compression corpus].
[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
| Method | Compression ratio | Compression speed | Decompression speed |
|:-------|------------------:|------------------:|---------------------:|
| `zstd -1` | `2.878` | `231.7 MB/s` | `594.4 MB/s` |
| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` |
| `zstd -5` | `3.274` | `77.1 MB/s` | `464.2 MB/s` |
| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` |
| `zstd -10` | `3.523` | `16.4 MB/s` | `489.2 MB/s` |
| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s` |
### zstdgrep
`zstdgrep` is a utility which makes it possible to `grep` directly a `.zst` compressed file.
It's used the same way as normal `grep`, for example :
`zstdgrep pattern file.zst`
`zstdgrep` is _not_ compatible with dictionary compression.
To search into a file compressed with a dictionary,
it's necessary to decompress it using `zstd` or `zstdcat`,
and then pipe the result to `grep`. For example :
`zstdcat -D dictionary -qc -- file.zst | grep pattern`

View file

@ -0,0 +1,256 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* *************************************
* Includes
***************************************/
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
#include <assert.h> /* assert */
#include "timefn.h" /* UTIL_time_t, UTIL_getTime */
#include "benchfn.h"
/* *************************************
* Constants
***************************************/
#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
/* *************************************
* Debug errors
***************************************/
#if defined(DEBUG) && (DEBUG >= 1)
# include <stdio.h> /* fprintf */
# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
#else
# define DEBUGOUTPUT(...)
#endif
/* error without displaying */
#define RETURN_QUIET_ERROR(retValue, ...) { \
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DEBUGOUTPUT("Error : "); \
DEBUGOUTPUT(__VA_ARGS__); \
DEBUGOUTPUT(" \n"); \
return retValue; \
}
/* Abort execution if a condition is not met */
#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
/* *************************************
* Benchmarking an arbitrary function
***************************************/
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
{
return outcome.error_tag_never_ever_use_directly == 0;
}
/* warning : this function will stop program execution if outcome is invalid !
* check outcome validity first, using BMK_isValid_runResult() */
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
{
CONTROL(outcome.error_tag_never_ever_use_directly == 0);
return outcome.internal_never_ever_use_directly;
}
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
{
CONTROL(outcome.error_tag_never_ever_use_directly != 0);
return outcome.error_result_never_ever_use_directly;
}
static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
{
BMK_runOutcome_t b;
memset(&b, 0, sizeof(b));
b.error_tag_never_ever_use_directly = 1;
b.error_result_never_ever_use_directly = errorResult;
return b;
}
static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
{
BMK_runOutcome_t outcome;
outcome.error_tag_never_ever_use_directly = 0;
outcome.internal_never_ever_use_directly = runTime;
return outcome;
}
/* initFn will be measured once, benchFn will be measured `nbLoops` times */
/* initFn is optional, provide NULL if none */
/* benchFn must return a size_t value that errorFn can interpret */
/* takes # of blocks and list of size & stuff for each. */
/* can report result of benchFn for each block into blockResult. */
/* blockResult is optional, provide NULL if this information is not required */
/* note : time per loop can be reported as zero if run time < timer resolution */
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
unsigned nbLoops)
{
size_t dstSize = 0;
nbLoops += !nbLoops; /* minimum nbLoops is 1 */
/* init */
{ size_t i;
for(i = 0; i < p.blockCount; i++) {
memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */
} }
/* benchmark */
{ UTIL_time_t const clockStart = UTIL_getTime();
unsigned loopNb, blockNb;
if (p.initFn != NULL) p.initFn(p.initPayload);
for (loopNb = 0; loopNb < nbLoops; loopNb++) {
for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
p.dstBuffers[blockNb], p.dstCapacities[blockNb],
p.benchPayload);
if (loopNb == 0) {
if (p.blockResults != NULL) p.blockResults[blockNb] = res;
if ((p.errorFn != NULL) && (p.errorFn(res))) {
RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
"Function benchmark failed on block %u (of size %u) with error %i",
blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
}
dstSize += res;
} }
} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
{ PTime const totalTime = UTIL_clockSpanNano(clockStart);
BMK_runTime_t rt;
rt.nanoSecPerRun = (double)totalTime / nbLoops;
rt.sumOfReturn = dstSize;
return BMK_setValid_runTime(rt);
} }
}
/* ==== Benchmarking any function, providing intermediate results ==== */
struct BMK_timedFnState_s {
PTime timeSpent_ns;
PTime timeBudget_ns;
PTime runBudget_ns;
BMK_runTime_t fastestRun;
unsigned nbLoops;
UTIL_time_t coolTime;
}; /* typedef'd to BMK_timedFnState_t within bench.h */
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
{
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
if (r == NULL) return NULL; /* malloc() error */
BMK_resetTimedFnState(r, total_ms, run_ms);
return r;
}
void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
BMK_timedFnState_t*
BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
{
typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */
size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
if (buffer == NULL) return NULL;
if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */
BMK_resetTimedFnState(r, total_ms, run_ms);
return r;
}
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
{
if (!total_ms) total_ms = 1 ;
if (!run_ms) run_ms = 1;
if (run_ms > total_ms) run_ms = total_ms;
timedFnState->timeSpent_ns = 0;
timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */
timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
timedFnState->nbLoops = 1;
timedFnState->coolTime = UTIL_getTime();
}
/* Tells if nb of seconds set in timedFnState for all runs is spent.
* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
{
return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
}
#undef MIN
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
BMK_benchParams_t p)
{
PTime const runBudget_ns = cont->runBudget_ns;
PTime const runTimeMin_ns = runBudget_ns / 2;
int completed = 0;
BMK_runTime_t bestRunTime = cont->fastestRun;
while (!completed) {
BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
return runResult;
}
{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
/* estimate nbLoops for next run to last approximately 1 second */
if (loopDuration_ns > (runBudget_ns / 50)) {
double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
} else {
/* previous run was too short : blindly increase workload by x multiplier */
const unsigned multiplier = 10;
assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */
cont->nbLoops *= multiplier;
}
if(loopDuration_ns < runTimeMin_ns) {
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
assert(completed == 0);
continue;
} else {
if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
bestRunTime = newRunTime;
}
completed = 1;
}
}
} /* while (!completed) */
return BMK_setValid_runTime(bestRunTime);
}

View file

@ -0,0 +1,183 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* benchfn :
* benchmark any function on a set of input
* providing result in nanoSecPerRun
* or detecting and returning an error
*/
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef BENCH_FN_H_23876
#define BENCH_FN_H_23876
/* === Dependencies === */
#include <stddef.h> /* size_t */
/* ==== Benchmark any function, iterated on a set of blocks ==== */
/* BMK_runTime_t: valid result return type */
typedef struct {
double nanoSecPerRun; /* time per iteration (over all blocks) */
size_t sumOfReturn; /* sum of return values */
} BMK_runTime_t;
/* BMK_runOutcome_t:
* type expressing the outcome of a benchmark run by BMK_benchFunction(),
* which can be either valid or invalid.
* benchmark outcome can be invalid if errorFn is provided.
* BMK_runOutcome_t must be considered "opaque" : never access its members directly.
* Instead, use its assigned methods :
* BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult.
* The structure is only described here to allow its allocation on stack. */
typedef struct {
BMK_runTime_t internal_never_ever_use_directly;
size_t error_result_never_ever_use_directly;
int error_tag_never_ever_use_directly;
} BMK_runOutcome_t;
/* prototypes for benchmarked functions */
typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
typedef size_t (*BMK_initFn_t)(void* initPayload);
typedef unsigned (*BMK_errorFn_t)(size_t);
/* BMK_benchFunction() parameters are provided via the following structure.
* A structure is preferable for readability,
* as the number of parameters required is fairly large.
* No initializer is provided, because it doesn't make sense to provide some "default" :
* all parameters must be specified by the caller.
* optional parameters are labelled explicitly, and accept value NULL when not used */
typedef struct {
BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */
void* benchPayload; /* pass custom parameters to benchFn :
* (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */
BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */
void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */
BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not.
* errorFn can be NULL, in which case no check is performed.
* errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error.
* Execution is stopped as soon as an error is detected.
* the triggering return value can be retrieved using BMK_extract_errorResult(). */
size_t blockCount; /* number of blocks to operate benchFn on.
* It's also the size of all array parameters :
* srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */
const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */
const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */
void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */
const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */
size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */
} BMK_benchParams_t;
/* BMK_benchFunction() :
* This function benchmarks benchFn and initFn, providing a result.
*
* params : see description of BMK_benchParams_t above.
* nbLoops: defines number of times benchFn is run over the full set of blocks.
* Minimum value is 1. A 0 is interpreted as a 1.
*
* @return: can express either an error or a successful result.
* Use BMK_isSuccessful_runOutcome() to check if benchmark was successful.
* If yes, extract the result with BMK_extract_runTime(),
* it will contain :
* .sumOfReturn : the sum of all return values of benchFn through all of blocks
* .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
* .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
* in which case, this value will be the total amount of bytes written into dstBuffer.
*
* blockResults : when provided (!= NULL), and when benchmark is successful,
* params.blockResults contains all return values of `benchFn` over all blocks.
* when provided (!= NULL), and when benchmark failed,
* params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block.
*/
BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops);
/* check first if the benchmark was successful or not */
int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
/* If the benchmark was successful, extract the result.
* note : this function will abort() program execution if benchmark failed !
* always check if benchmark was successful first !
*/
BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
/* when benchmark failed, it means one invocation of `benchFn` failed.
* The failure was detected by `errorFn`, operating on return values of `benchFn`.
* Returns the faulty return value.
* note : this function will abort() program execution if benchmark did not failed.
* always check if benchmark failed first !
*/
size_t BMK_extract_errorResult(BMK_runOutcome_t outcome);
/* ==== Benchmark any function, returning intermediate results ==== */
/* state information tracking benchmark session */
typedef struct BMK_timedFnState_s BMK_timedFnState_t;
/* BMK_benchTimedFn() :
* Similar to BMK_benchFunction(), most arguments being identical.
* Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
* Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
* Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
* call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
* Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
*/
BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState,
BMK_benchParams_t params);
/* Tells if duration of all benchmark runs has exceeded total_ms
*/
int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
* Create/Set BMK_timedFnState_t for next benchmark session,
* which shall last a minimum of total_ms milliseconds,
* producing intermediate results, paced at interval of (approximately) run_ms.
*/
BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
void BMK_freeTimedFnState(BMK_timedFnState_t* state);
/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() :
* Makes it possible to statically allocate a BMK_timedFnState_t on stack.
* BMK_timedFnState_shell is only there to allocate space,
* never ever access its members.
* BMK_timedFnState_t() actually accepts any buffer.
* It will check if provided buffer is large enough and is correctly aligned,
* and will return NULL if conditions are not respected.
*/
#define BMK_TIMEDFNSTATE_SIZE 64
typedef union {
char never_access_space[BMK_TIMEDFNSTATE_SIZE];
long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */
} BMK_timedFnState_shell;
BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms);
#endif /* BENCH_FN_H_23876 */
#if defined (__cplusplus)
}
#endif

View file

@ -0,0 +1,882 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* **************************************
* Tuning parameters
****************************************/
#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */
#define BMK_TIMETEST_DEFAULT_S 3
#endif
/* *************************************
* Includes
***************************************/
#include "platform.h" /* Large Files support */
#include "util.h" /* UTIL_getFileSize, UTIL_sleep */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset, strerror */
#include <stdio.h> /* fprintf, fopen */
#include <errno.h>
#include <assert.h> /* assert */
#include "timefn.h" /* UTIL_time_t */
#include "benchfn.h"
#include "../lib/common/mem.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "../lib/zstd.h"
#include "datagen.h" /* RDG_genBuffer */
#include "../lib/common/xxhash.h"
#include "benchzstd.h"
#include "../lib/zstd_errors.h"
/* *************************************
* Constants
***************************************/
#ifndef ZSTD_GIT_COMMIT
# define ZSTD_GIT_COMMIT_STRING ""
#else
# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
#endif
#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */
#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */
#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
#define COOLPERIOD_SEC 10
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define BMK_RUNTEST_DEFAULT_MS 1000
static const size_t maxMemory = (sizeof(size_t)==4) ?
/* 32-bit */ (2 GB - 64 MB) :
/* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
/* *************************************
* console display
***************************************/
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); }
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */
/* *************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
#define RETURN_ERROR_INT(errorNum, ...) { \
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", errorNum); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
return errorNum; \
}
#define CHECK_Z(zf) { \
size_t const zerr = zf; \
if (ZSTD_isError(zerr)) { \
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DISPLAY("Error : "); \
DISPLAY("%s failed : %s", \
#zf, ZSTD_getErrorName(zerr)); \
DISPLAY(" \n"); \
exit(1); \
} \
}
#define RETURN_ERROR(errorNum, retType, ...) { \
retType r; \
memset(&r, 0, sizeof(retType)); \
DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \
DISPLAYLEVEL(1, "Error %i : ", errorNum); \
DISPLAYLEVEL(1, __VA_ARGS__); \
DISPLAYLEVEL(1, " \n"); \
r.tag = errorNum; \
return r; \
}
/* *************************************
* Benchmark Parameters
***************************************/
BMK_advancedParams_t BMK_initAdvancedParams(void) {
BMK_advancedParams_t const res = {
BMK_both, /* mode */
BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
0, /* blockSize */
0, /* nbWorkers */
0, /* realTime */
0, /* additionalParam */
0, /* ldmFlag */
0, /* ldmMinMatch */
0, /* ldmHashLog */
0, /* ldmBuckSizeLog */
0, /* ldmHashRateLog */
ZSTD_lcm_auto, /* literalCompressionMode */
0 /* useRowMatchFinder */
};
return res;
}
/* ********************************************************
* Bench functions
**********************************************************/
typedef struct {
const void* srcPtr;
size_t srcSize;
void* cPtr;
size_t cRoom;
size_t cSize;
void* resPtr;
size_t resSize;
} blockParam_t;
#undef MIN
#undef MAX
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
static void
BMK_initCCtx(ZSTD_CCtx* ctx,
const void* dictBuffer, size_t dictBufferSize,
int cLevel,
const ZSTD_compressionParameters* comprParams,
const BMK_advancedParams_t* adv)
{
ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters);
if (adv->nbWorkers==1) {
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0));
} else {
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers));
}
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode));
CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, comprParams->strategy));
CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize));
}
static void BMK_initDCtx(ZSTD_DCtx* dctx,
const void* dictBuffer, size_t dictBufferSize) {
CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize));
}
typedef struct {
ZSTD_CCtx* cctx;
const void* dictBuffer;
size_t dictBufferSize;
int cLevel;
const ZSTD_compressionParameters* comprParams;
const BMK_advancedParams_t* adv;
} BMK_initCCtxArgs;
static size_t local_initCCtx(void* payload) {
BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv);
return 0;
}
typedef struct {
ZSTD_DCtx* dctx;
const void* dictBuffer;
size_t dictBufferSize;
} BMK_initDCtxArgs;
static size_t local_initDCtx(void* payload) {
BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
return 0;
}
/* `addArgs` is the context */
static size_t local_defaultCompress(
const void* srcBuffer, size_t srcSize,
void* dstBuffer, size_t dstSize,
void* addArgs)
{
ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
}
/* `addArgs` is the context */
static size_t local_defaultDecompress(
const void* srcBuffer, size_t srcSize,
void* dstBuffer, size_t dstCapacity,
void* addArgs)
{
size_t moreToFlush = 1;
ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
ZSTD_inBuffer in;
ZSTD_outBuffer out;
in.src = srcBuffer; in.size = srcSize; in.pos = 0;
out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0;
while (moreToFlush) {
if(out.pos == out.size) {
return (size_t)-ZSTD_error_dstSize_tooSmall;
}
moreToFlush = ZSTD_decompressStream(dctx, &out, &in);
if (ZSTD_isError(moreToFlush)) {
return moreToFlush;
}
}
return out.pos;
}
/* ================================================================= */
/* Benchmark Zstandard, mem-to-mem scenarios */
/* ================================================================= */
int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
{
return outcome.tag == 0;
}
BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
{
assert(outcome.tag == 0);
return outcome.internal_never_use_directly;
}
static BMK_benchOutcome_t BMK_benchOutcome_error(void)
{
BMK_benchOutcome_t b;
memset(&b, 0, sizeof(b));
b.tag = 1;
return b;
}
static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result)
{
BMK_benchOutcome_t b;
b.tag = 0;
b.internal_never_use_directly = result;
return b;
}
/* benchMem with no allocation */
static BMK_benchOutcome_t
BMK_benchMemAdvancedNoAlloc(
const void** srcPtrs, size_t* srcSizes,
void** cPtrs, size_t* cCapacities, size_t* cSizes,
void** resPtrs, size_t* resSizes,
void** resultBufferPtr, void* compressedBuffer,
size_t maxCompressedSize,
BMK_timedFnState_t* timeStateCompress,
BMK_timedFnState_t* timeStateDecompress,
const void* srcBuffer, size_t srcSize,
const size_t* fileSizes, unsigned nbFiles,
const int cLevel,
const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
int displayLevel, const char* displayName,
const BMK_advancedParams_t* adv)
{
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */
BMK_benchResult_t benchResult;
size_t const loadedCompressedSize = srcSize;
size_t cSize = 0;
double ratio = 0.;
U32 nbBlocks;
assert(cctx != NULL); assert(dctx != NULL);
/* init */
memset(&benchResult, 0, sizeof(benchResult));
if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */
if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */
const char* srcPtr = (const char*)srcBuffer;
U64 totalDSize64 = 0;
U32 fileNb;
for (fileNb=0; fileNb<nbFiles; fileNb++) {
U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
if (fSize64==0) RETURN_ERROR(32, BMK_benchOutcome_t, "Impossible to determine original size ");
totalDSize64 += fSize64;
srcPtr += fileSizes[fileNb];
}
{ size_t const decodedSize = (size_t)totalDSize64;
assert((U64)decodedSize == totalDSize64); /* check overflow */
free(*resultBufferPtr);
*resultBufferPtr = malloc(decodedSize);
if (!(*resultBufferPtr)) {
RETURN_ERROR(33, BMK_benchOutcome_t, "not enough memory");
}
if (totalDSize64 > decodedSize) { /* size_t overflow */
free(*resultBufferPtr);
RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large");
}
cSize = srcSize;
srcSize = decodedSize;
ratio = (double)srcSize / (double)cSize;
}
}
/* Init data blocks */
{ const char* srcPtr = (const char*)srcBuffer;
char* cPtr = (char*)compressedBuffer;
char* resPtr = (char*)(*resultBufferPtr);
U32 fileNb;
for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
size_t remaining = fileSizes[fileNb];
U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
for ( ; nbBlocks<blockEnd; nbBlocks++) {
size_t const thisBlockSize = MIN(remaining, blockSize);
srcPtrs[nbBlocks] = srcPtr;
srcSizes[nbBlocks] = thisBlockSize;
cPtrs[nbBlocks] = cPtr;
cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
resPtrs[nbBlocks] = resPtr;
resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
srcPtr += thisBlockSize;
cPtr += cCapacities[nbBlocks];
resPtr += thisBlockSize;
remaining -= thisBlockSize;
if (adv->mode == BMK_decodeOnly) {
cSizes[nbBlocks] = thisBlockSize;
benchResult.cSize = thisBlockSize;
}
}
}
}
/* warming up `compressedBuffer` */
if (adv->mode == BMK_decodeOnly) {
memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
} else {
RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
}
/* Bench */
{ U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0);
# define NB_MARKS 4
const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
U32 markNb = 0;
int compressionCompleted = (adv->mode == BMK_decodeOnly);
int decompressionCompleted = (adv->mode == BMK_compressOnly);
BMK_benchParams_t cbp, dbp;
BMK_initCCtxArgs cctxprep;
BMK_initDCtxArgs dctxprep;
cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */
cbp.benchPayload = cctx;
cbp.initFn = local_initCCtx; /* BMK_initCCtx */
cbp.initPayload = &cctxprep;
cbp.errorFn = ZSTD_isError;
cbp.blockCount = nbBlocks;
cbp.srcBuffers = srcPtrs;
cbp.srcSizes = srcSizes;
cbp.dstBuffers = cPtrs;
cbp.dstCapacities = cCapacities;
cbp.blockResults = cSizes;
cctxprep.cctx = cctx;
cctxprep.dictBuffer = dictBuffer;
cctxprep.dictBufferSize = dictBufferSize;
cctxprep.cLevel = cLevel;
cctxprep.comprParams = comprParams;
cctxprep.adv = adv;
dbp.benchFn = local_defaultDecompress;
dbp.benchPayload = dctx;
dbp.initFn = local_initDCtx;
dbp.initPayload = &dctxprep;
dbp.errorFn = ZSTD_isError;
dbp.blockCount = nbBlocks;
dbp.srcBuffers = (const void* const *) cPtrs;
dbp.srcSizes = cSizes;
dbp.dstBuffers = resPtrs;
dbp.dstCapacities = resSizes;
dbp.blockResults = NULL;
dctxprep.dctx = dctx;
dctxprep.dictBuffer = dictBuffer;
dctxprep.dictBufferSize = dictBufferSize;
DISPLAYLEVEL(2, "\r%70s\r", ""); /* blank line */
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (unsigned)srcSize);
while (!(compressionCompleted && decompressionCompleted)) {
if (!compressionCompleted) {
BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp);
if (!BMK_isSuccessful_runOutcome(cOutcome)) {
return BMK_benchOutcome_error();
}
{ BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
cSize = cResult.sumOfReturn;
ratio = (double)srcSize / cSize;
{ BMK_benchResult_t newResult;
newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
benchResult.cSize = cSize;
if (newResult.cSpeed > benchResult.cSpeed)
benchResult.cSpeed = newResult.cSpeed;
} }
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
marks[markNb], displayName,
(unsigned)srcSize, (unsigned)cSize,
ratioAccuracy, ratio,
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
}
compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
}
if(!decompressionCompleted) {
BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
if(!BMK_isSuccessful_runOutcome(dOutcome)) {
return BMK_benchOutcome_error();
}
{ BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
if (newDSpeed > benchResult.dSpeed)
benchResult.dSpeed = newDSpeed;
}
{ int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
marks[markNb], displayName,
(unsigned)srcSize, (unsigned)cSize,
ratioAccuracy, ratio,
benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
(double)benchResult.dSpeed / MB_UNIT);
}
decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
}
markNb = (markNb+1) % NB_MARKS;
} /* while (!(compressionCompleted && decompressionCompleted)) */
/* CRC Checking */
{ const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
size_t u;
DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n",
displayName, (unsigned)crcOrig, (unsigned)crcCheck);
for (u=0; u<srcSize; u++) {
if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
unsigned segNb, bNb, pos;
size_t bacc = 0;
DISPLAY("Decoding error at pos %u ", (unsigned)u);
for (segNb = 0; segNb < nbBlocks; segNb++) {
if (bacc + srcSizes[segNb] > u) break;
bacc += srcSizes[segNb];
}
pos = (U32)(u - bacc);
bNb = pos / (128 KB);
DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
{ size_t const lowest = (u>5) ? 5 : u;
size_t n;
DISPLAY("origin: ");
for (n=lowest; n>0; n--)
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]);
DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]);
for (n=1; n<3; n++)
DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
DISPLAY(" \n");
DISPLAY("decode: ");
for (n=lowest; n>0; n++)
DISPLAY("%02X ", resultBuffer[u-n]);
DISPLAY(" :%02X: ", resultBuffer[u]);
for (n=1; n<3; n++)
DISPLAY("%02X ", resultBuffer[u+n]);
DISPLAY(" \n");
}
break;
}
if (u==srcSize-1) { /* should never happen */
DISPLAY("no difference detected\n");
}
} /* for (u=0; u<srcSize; u++) */
} /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */
} /* CRC Checking */
if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */
double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
if (adv->additionalParam) {
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
} else {
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
}
}
DISPLAYLEVEL(2, "%2i#\n", cLevel);
} /* Bench */
benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
return BMK_benchOutcome_setValidResult(benchResult);
}
BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
void* dstBuffer, size_t dstCapacity,
const size_t* fileSizes, unsigned nbFiles,
int cLevel, const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
{
int const dstParamsError = !dstBuffer ^ !dstCapacity; /* must be both NULL or none */
size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
/* these are the blockTable parameters, just split up */
const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*));
size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize);
void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
BMK_benchOutcome_t outcome = BMK_benchOutcome_error(); /* error by default */
void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
int allocationincomplete = !srcPtrs || !srcSizes || !cPtrs ||
!cSizes || !cCapacities || !resPtrs || !resSizes ||
!timeStateCompress || !timeStateDecompress ||
!cctx || !dctx ||
!compressedBuffer || !resultBuffer;
if (!allocationincomplete && !dstParamsError) {
outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes,
cPtrs, cCapacities, cSizes,
resPtrs, resSizes,
&resultBuffer,
compressedBuffer, maxCompressedSize,
timeStateCompress, timeStateDecompress,
srcBuffer, srcSize,
fileSizes, nbFiles,
cLevel, comprParams,
dictBuffer, dictBufferSize,
cctx, dctx,
displayLevel, displayName, adv);
}
/* clean up */
BMK_freeTimedFnState(timeStateCompress);
BMK_freeTimedFnState(timeStateDecompress);
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(internalDstBuffer);
free(resultBuffer);
free((void*)srcPtrs);
free(srcSizes);
free(cPtrs);
free(cSizes);
free(cCapacities);
free(resPtrs);
free(resSizes);
if(allocationincomplete) {
RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory");
}
if(dstParamsError) {
RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
}
return outcome;
}
BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
const size_t* fileSizes, unsigned nbFiles,
int cLevel, const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
int displayLevel, const char* displayName) {
BMK_advancedParams_t const adv = BMK_initAdvancedParams();
return BMK_benchMemAdvanced(srcBuffer, srcSize,
NULL, 0,
fileSizes, nbFiles,
cLevel, comprParams,
dictBuffer, dictBufferSize,
displayLevel, displayName, &adv);
}
static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
const size_t* fileSizes, unsigned nbFiles,
int cLevel, const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
int displayLevel, const char* displayName,
BMK_advancedParams_t const * const adv)
{
const char* pch = strrchr(displayName, '\\'); /* Windows */
if (!pch) pch = strrchr(displayName, '/'); /* Linux */
if (pch) displayName = pch+1;
if (adv->realTime) {
DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
SET_REALTIME_PRIORITY;
}
if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */
DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
(unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10));
return BMK_benchMemAdvanced(srcBuffer, benchedSize,
NULL, 0,
fileSizes, nbFiles,
cLevel, comprParams,
dictBuffer, dictBufferSize,
displayLevel, displayName, adv);
}
BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
const ZSTD_compressionParameters* compressionParams,
int displayLevel, const BMK_advancedParams_t* adv)
{
char name[20] = {0};
size_t const benchedSize = 10000000;
void* srcBuffer;
BMK_benchOutcome_t res;
if (cLevel > ZSTD_maxCLevel()) {
RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
}
/* Memory allocation */
srcBuffer = malloc(benchedSize);
if (!srcBuffer) RETURN_ERROR(21, BMK_benchOutcome_t, "not enough memory");
/* Fill input buffer */
RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
/* Bench */
snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
res = BMK_benchCLevel(srcBuffer, benchedSize,
&benchedSize /* ? */, 1 /* ? */,
cLevel, compressionParams,
NULL, 0, /* dictionary */
displayLevel, name, adv);
/* clean up */
free(srcBuffer);
return res;
}
static size_t BMK_findMaxMem(U64 requiredMem)
{
size_t const step = 64 MB;
BYTE* testmem = NULL;
requiredMem = (((requiredMem >> 26) + 1) << 26);
requiredMem += step;
if (requiredMem > maxMemory) requiredMem = maxMemory;
do {
testmem = (BYTE*)malloc((size_t)requiredMem);
requiredMem -= step;
} while (!testmem && requiredMem > 0);
free(testmem);
return (size_t)(requiredMem);
}
/*! BMK_loadFiles() :
* Loads `buffer` with content of files listed within `fileNamesTable`.
* At most, fills `buffer` entirely. */
static int BMK_loadFiles(void* buffer, size_t bufferSize,
size_t* fileSizes,
const char* const * fileNamesTable, unsigned nbFiles,
int displayLevel)
{
size_t pos = 0, totalSize = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); /* last file may be shortened */
if (UTIL_isDirectory(fileNamesTable[n])) {
DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]);
fileSizes[n] = 0;
continue;
}
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
fileSizes[n] = 0;
continue;
}
{ FILE* const f = fopen(fileNamesTable[n], "rb");
if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]);
DISPLAYLEVEL(2, "Loading %s... \r", fileNamesTable[n]);
if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]);
pos += readSize;
}
fileSizes[n] = (size_t)fileSize;
totalSize += (size_t)fileSize;
fclose(f);
} }
if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench");
return 0;
}
BMK_benchOutcome_t BMK_benchFilesAdvanced(
const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName, int cLevel,
const ZSTD_compressionParameters* compressionParams,
int displayLevel, const BMK_advancedParams_t* adv)
{
void* srcBuffer = NULL;
size_t benchedSize;
void* dictBuffer = NULL;
size_t dictBufferSize = 0;
size_t* fileSizes = NULL;
BMK_benchOutcome_t res;
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
if (!nbFiles) {
RETURN_ERROR(14, BMK_benchOutcome_t, "No Files to Benchmark");
}
if (cLevel > ZSTD_maxCLevel()) {
RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
}
fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
if (!fileSizes) RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory for fileSizes");
/* Load dictionary */
if (dictFileName != NULL) {
U64 const dictFileSize = UTIL_getFileSize(dictFileName);
if (dictFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno));
free(fileSizes);
RETURN_ERROR(9, BMK_benchOutcome_t, "benchmark aborted");
}
if (dictFileSize > 64 MB) {
free(fileSizes);
RETURN_ERROR(10, BMK_benchOutcome_t, "dictionary file %s too large", dictFileName);
}
dictBufferSize = (size_t)dictFileSize;
dictBuffer = malloc(dictBufferSize);
if (dictBuffer==NULL) {
free(fileSizes);
RETURN_ERROR(11, BMK_benchOutcome_t, "not enough memory for dictionary (%u bytes)",
(unsigned)dictBufferSize);
}
{ int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
fileSizes, &dictFileName /*?*/,
1 /*?*/, displayLevel);
if (errorCode) {
res = BMK_benchOutcome_error();
goto _cleanUp;
} }
}
/* Memory allocation & restrictions */
benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
if (benchedSize < totalSizeToLoad)
DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20));
srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
if (!srcBuffer) {
free(dictBuffer);
free(fileSizes);
RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory");
}
/* Load input buffer */
{ int const errorCode = BMK_loadFiles(srcBuffer, benchedSize,
fileSizes, fileNamesTable, nbFiles,
displayLevel);
if (errorCode) {
res = BMK_benchOutcome_error();
goto _cleanUp;
} }
/* Bench */
{ char mfName[20] = {0};
snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
{ const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
res = BMK_benchCLevel(srcBuffer, benchedSize,
fileSizes, nbFiles,
cLevel, compressionParams,
dictBuffer, dictBufferSize,
displayLevel, displayName,
adv);
} }
_cleanUp:
free(srcBuffer);
free(dictBuffer);
free(fileSizes);
return res;
}
BMK_benchOutcome_t BMK_benchFiles(
const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName,
int cLevel, const ZSTD_compressionParameters* compressionParams,
int displayLevel)
{
BMK_advancedParams_t const adv = BMK_initAdvancedParams();
return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv);
}

View file

@ -0,0 +1,213 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* benchzstd :
* benchmark Zstandard compression / decompression
* over a set of files or buffers
* and display progress result and final summary
*/
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef BENCH_ZSTD_H_3242387
#define BENCH_ZSTD_H_3242387
/* === Dependencies === */
#include <stddef.h> /* size_t */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
#include "../lib/zstd.h" /* ZSTD_compressionParameters */
/* === Constants === */
#define MB_UNIT 1000000
/* === Benchmark functions === */
/* Creates a variant `typeName`, able to express "error or valid result".
* Functions with return type `typeName`
* must first check if result is valid, using BMK_isSuccessful_*(),
* and only then can extract `baseType`.
*/
#define VARIANT_ERROR_RESULT(baseType, variantName) \
\
typedef struct { \
baseType internal_never_use_directly; \
int tag; \
} variantName
typedef struct {
size_t cSize;
unsigned long long cSpeed; /* bytes / sec */
unsigned long long dSpeed;
size_t cMem; /* memory usage during compression */
} BMK_benchResult_t;
VARIANT_ERROR_RESULT(BMK_benchResult_t, BMK_benchOutcome_t);
/* check first if the return structure represents an error or a valid result */
int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome);
/* extract result from variant type.
* note : this function will abort() program execution if result is not valid
* check result validity first, by using BMK_isSuccessful_benchOutcome()
*/
BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome);
/*! BMK_benchFiles() -- called by zstdcli */
/* Loads files from fileNamesTable into memory,
* and an optional dictionary from dictFileName (can be NULL),
* then uses benchMem().
* fileNamesTable - name of files to benchmark.
* nbFiles - number of files (size of fileNamesTable), must be > 0.
* dictFileName - name of dictionary file to load.
* cLevel - compression level to benchmark, errors if invalid.
* compressionParams - advanced compression Parameters.
* displayLevel - what gets printed:
* 0 : no display;
* 1 : errors;
* 2 : + result + interaction + warnings;
* 3 : + information;
* 4 : + debug
* @return:
* a variant, which expresses either an error, or a valid result.
* Use BMK_isSuccessful_benchOutcome() to check if function was successful.
* If yes, extract the valid result with BMK_extract_benchResult(),
* it will contain :
* .cSpeed: compression speed in bytes per second,
* .dSpeed: decompression speed in bytes per second,
* .cSize : compressed size, in bytes
* .cMem : memory budget required for the compression context
*/
BMK_benchOutcome_t BMK_benchFiles(
const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName,
int cLevel, const ZSTD_compressionParameters* compressionParams,
int displayLevel);
typedef enum {
BMK_both = 0,
BMK_decodeOnly = 1,
BMK_compressOnly = 2
} BMK_mode_t;
typedef struct {
BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */
unsigned nbSeconds; /* default timing is in nbSeconds */
size_t blockSize; /* Maximum size of each block*/
int nbWorkers; /* multithreading */
unsigned realTime; /* real time priority */
int additionalParam; /* used by python speed benchmark */
int ldmFlag; /* enables long distance matching */
int ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */
int ldmHashLog;
int ldmBucketSizeLog;
int ldmHashRateLog;
ZSTD_literalCompressionMode_e literalCompressionMode;
int useRowMatchFinder; /* use row-based matchfinder if possible */
} BMK_advancedParams_t;
/* returns default parameters used by nonAdvanced functions */
BMK_advancedParams_t BMK_initAdvancedParams(void);
/*! BMK_benchFilesAdvanced():
* Same as BMK_benchFiles(),
* with more controls, provided through advancedParams_t structure */
BMK_benchOutcome_t BMK_benchFilesAdvanced(
const char* const * fileNamesTable, unsigned nbFiles,
const char* dictFileName,
int cLevel, const ZSTD_compressionParameters* compressionParams,
int displayLevel, const BMK_advancedParams_t* adv);
/*! BMK_syntheticTest() -- called from zstdcli */
/* Generates a sample with datagen, using compressibility argument */
/* cLevel - compression level to benchmark, errors if invalid
* compressibility - determines compressibility of sample
* compressionParams - basic compression Parameters
* displayLevel - see benchFiles
* adv - see advanced_Params_t
* @return:
* a variant, which expresses either an error, or a valid result.
* Use BMK_isSuccessful_benchOutcome() to check if function was successful.
* If yes, extract the valid result with BMK_extract_benchResult(),
* it will contain :
* .cSpeed: compression speed in bytes per second,
* .dSpeed: decompression speed in bytes per second,
* .cSize : compressed size, in bytes
* .cMem : memory budget required for the compression context
*/
BMK_benchOutcome_t BMK_syntheticTest(
int cLevel, double compressibility,
const ZSTD_compressionParameters* compressionParams,
int displayLevel, const BMK_advancedParams_t* adv);
/* === Benchmark Zstandard in a memory-to-memory scenario === */
/** BMK_benchMem() -- core benchmarking function, called in paramgrill
* applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer
* with specific compression parameters provided by other arguments using benchFunction
* (cLevel, comprParams + adv in advanced Mode) */
/* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode
* srcSize - size of data in srcBuffer
* fileSizes - srcBuffer is considered cut into 1+ segments, to compress separately.
* note : sum(fileSizes) must be == srcSize. (<== ensure it's properly checked)
* nbFiles - nb of segments
* cLevel - compression level
* comprParams - basic compression parameters
* dictBuffer - a dictionary if used, null otherwise
* dictBufferSize - size of dictBuffer, 0 otherwise
* displayLevel - see BMK_benchFiles
* displayName - name used by display
* @return:
* a variant, which expresses either an error, or a valid result.
* Use BMK_isSuccessful_benchOutcome() to check if function was successful.
* If yes, extract the valid result with BMK_extract_benchResult(),
* it will contain :
* .cSpeed: compression speed in bytes per second,
* .dSpeed: decompression speed in bytes per second,
* .cSize : compressed size, in bytes
* .cMem : memory budget required for the compression context
*/
BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
const size_t* fileSizes, unsigned nbFiles,
int cLevel, const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
int displayLevel, const char* displayName);
/* BMK_benchMemAdvanced() : same as BMK_benchMem()
* with following additional options :
* dstBuffer - destination buffer to write compressed output in, NULL if none provided.
* dstCapacity - capacity of destination buffer, give 0 if dstBuffer = NULL
* adv = see advancedParams_t
*/
BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
void* dstBuffer, size_t dstCapacity,
const size_t* fileSizes, unsigned nbFiles,
int cLevel, const ZSTD_compressionParameters* comprParams,
const void* dictBuffer, size_t dictBufferSize,
int displayLevel, const char* displayName,
const BMK_advancedParams_t* adv);
#endif /* BENCH_ZSTD_H_3242387 */
#if defined (__cplusplus)
}
#endif

View file

@ -0,0 +1,186 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-************************************
* Dependencies
**************************************/
#include "datagen.h"
#include "platform.h" /* SET_BINARY_MODE */
#include <stdlib.h> /* malloc, free */
#include <stdio.h> /* FILE, fwrite, fprintf */
#include <string.h> /* memcpy */
#include "../lib/common/mem.h" /* U32 */
/*-************************************
* Macros
**************************************/
#define KB *(1 <<10)
#define MIN(a,b) ( (a) < (b) ? (a) : (b) )
#define RDG_DEBUG 0
#define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ )
/*-************************************
* Local constants
**************************************/
#define LTLOG 13
#define LTSIZE (1<<LTLOG)
#define LTMASK (LTSIZE-1)
/*-*******************************************************
* Local Functions
*********************************************************/
#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static U32 RDG_rand(U32* src)
{
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = RDG_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
typedef U32 fixedPoint_24_8;
static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld)
{
BYTE const firstChar = (ld<=0.0) ? 0 : '(';
BYTE const lastChar = (ld<=0.0) ? 255 : '}';
BYTE character = (ld<=0.0) ? 0 : '0';
U32 u;
if (ld<=0) ld = 0;
for (u=0; u<LTSIZE; ) {
U32 const weight = (((LTSIZE - u) * ld) >> 8) + 1;
U32 const end = MIN ( u + weight , LTSIZE);
while (u < end) ldt[u++] = character;
character++;
if (character > lastChar) character = firstChar;
}
}
static BYTE RDG_genChar(U32* seed, const BYTE* ldt)
{
U32 const id = RDG_rand(seed) & LTMASK;
return ldt[id]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */
}
static U32 RDG_rand15Bits (U32* seedPtr)
{
return RDG_rand(seedPtr) & 0x7FFF;
}
static U32 RDG_randLength(U32* seedPtr)
{
if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF); /* small length */
return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
}
static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize,
double matchProba, const BYTE* ldt, U32* seedPtr)
{
BYTE* const buffPtr = (BYTE*)buffer;
U32 const matchProba32 = (U32)(32768 * matchProba);
size_t pos = prefixSize;
U32 prevOffset = 1;
/* special case : sparse content */
while (matchProba >= 1.0) {
size_t size0 = RDG_rand(seedPtr) & 3;
size0 = (size_t)1 << (16 + size0 * 2);
size0 += RDG_rand(seedPtr) & (size0-1); /* because size0 is power of 2*/
if (buffSize < pos + size0) {
memset(buffPtr+pos, 0, buffSize-pos);
return;
}
memset(buffPtr+pos, 0, size0);
pos += size0;
buffPtr[pos-1] = RDG_genChar(seedPtr, ldt);
continue;
}
/* init */
if (pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1;
/* Generate compressible data */
while (pos < buffSize) {
/* Select : Literal (char) or Match (within 32K) */
if (RDG_rand15Bits(seedPtr) < matchProba32) {
/* Copy (within 32K) */
U32 const length = RDG_randLength(seedPtr) + 4;
U32 const d = (U32) MIN(pos + length , buffSize);
U32 const repeatOffset = (RDG_rand(seedPtr) & 15) == 2;
U32 const randOffset = RDG_rand15Bits(seedPtr) + 1;
U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
size_t match = pos - offset;
while (pos < d) { buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ }
prevOffset = offset;
} else {
/* Literal (noise) */
U32 const length = RDG_randLength(seedPtr);
U32 const d = (U32) MIN(pos + length, buffSize);
while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); }
} }
}
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
{
U32 seed32 = seed;
BYTE ldt[LTSIZE];
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
if (litProba<=0.0) litProba = matchProba / 4.5;
RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32);
}
void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed)
{
U32 seed32 = seed;
size_t const stdBlockSize = 128 KB;
size_t const stdDictSize = 32 KB;
BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize);
U64 total = 0;
BYTE ldt[LTSIZE]; /* literals distribution table */
/* init */
if (buff==NULL) { perror("datagen"); exit(1); }
if (litProba<=0.0) litProba = matchProba / 4.5;
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
SET_BINARY_MODE(stdout);
/* Generate initial dict */
RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed32);
/* Generate compressible data */
while (total < size) {
size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total));
RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed32);
total += genBlockSize;
{ size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; }
/* update dict */
memcpy(buff, buff + stdBlockSize, stdDictSize);
}
/* cleanup */
free(buff);
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef DATAGEN_H
#define DATAGEN_H
#include <stddef.h> /* size_t */
void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed);
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
/*!RDG_genBuffer
Generate 'size' bytes of compressible data into 'buffer'.
Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1.
'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used.
Generated data pattern can be modified using different 'seed'.
For a triplet (matchProba, litProba, seed), the function always generate the same content.
RDG_genStdout
Same as RDG_genBuffer, but generates data into stdout
*/
#endif

360
dependencies/zstd-1.5.0/programs/dibio.c vendored Normal file
View file

@ -0,0 +1,360 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* **************************************
* Compiler Warnings
****************************************/
#ifdef _MSC_VER
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/*-*************************************
* Includes
***************************************/
#include "platform.h" /* Large Files support */
#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
#include <stdlib.h> /* malloc, free */
#include <string.h> /* memset */
#include <stdio.h> /* fprintf, fopen, ftello64 */
#include <errno.h> /* errno */
#include <assert.h>
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
#include "../lib/common/mem.h" /* read */
#include "../lib/common/error_private.h"
#include "dibio.h"
/*-*************************************
* Constants
***************************************/
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define SAMPLESIZE_MAX (128 KB)
#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */
#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */
#define FASTCOVER_MEMMULT 1 /* rough estimation : memory cost to analyze 1 byte of sample */
static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
/*-*************************************
* Console display
***************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static const U64 g_refreshRate = SEC_TO_MICRO / 6;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
{ g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
if (displayLevel>=4) fflush(stderr); } } }
/*-*************************************
* Exceptions
***************************************/
#ifndef DEBUG
# define DEBUG 0
#endif
#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
#define EXM_THROW(error, ...) \
{ \
DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
DISPLAY("Error %i : ", error); \
DISPLAY(__VA_ARGS__); \
DISPLAY("\n"); \
exit(error); \
}
/* ********************************************************
* Helper functions
**********************************************************/
#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
/* ********************************************************
* File related operations
**********************************************************/
/** DiB_loadFiles() :
* load samples from files listed in fileNamesTable into buffer.
* works even if buffer is too small to load all samples.
* Also provides the size of each sample into sampleSizes table
* which must be sized correctly, using DiB_fileStats().
* @return : nb of samples effectively loaded into `buffer`
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
* sampleSizes is filled with the size of each sample.
*/
static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
size_t* sampleSizes, unsigned sstSize,
const char** fileNamesTable, unsigned nbFiles, size_t targetChunkSize,
unsigned displayLevel)
{
char* const buff = (char*)buffer;
size_t pos = 0;
unsigned nbLoadedChunks = 0, fileIndex;
for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
const char* const fileName = fileNamesTable[fileIndex];
unsigned long long const fs64 = UTIL_getFileSize(fileName);
unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
U32 cnb;
FILE* const f = fopen(fileName, "rb");
if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
DISPLAYUPDATE(2, "Loading %s... \r", fileName);
for (cnb=0; cnb<nbChunks; cnb++) {
size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
if (toLoad > *bufferSizePtr-pos) break;
{ size_t const readSize = fread(buff+pos, 1, toLoad, f);
if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
pos += readSize;
sampleSizes[nbLoadedChunks++] = toLoad;
remainingToLoad -= targetChunkSize;
if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
fileIndex = nbFiles; /* stop there */
break;
}
if (toLoad < targetChunkSize) {
fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
} } }
fclose(f);
}
DISPLAYLEVEL(2, "\r%79s\r", "");
*bufferSizePtr = pos;
DISPLAYLEVEL(4, "loaded : %u KB \n", (unsigned)(pos >> 10))
return nbLoadedChunks;
}
#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static U32 DiB_rand(U32* src)
{
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = DiB_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
/* DiB_shuffle() :
* shuffle a table of file names in a semi-random way
* It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
* it will load random elements from it, instead of just the first ones. */
static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
U32 seed = 0xFD2FB528;
unsigned i;
assert(nbFiles >= 1);
for (i = nbFiles - 1; i > 0; --i) {
unsigned const j = DiB_rand(&seed) % (i + 1);
const char* const tmp = fileNamesTable[j];
fileNamesTable[j] = fileNamesTable[i];
fileNamesTable[i] = tmp;
}
}
/*-********************************************************
* Dictionary training functions
**********************************************************/
static size_t DiB_findMaxMem(unsigned long long requiredMem)
{
size_t const step = 8 MB;
void* testmem = NULL;
requiredMem = (((requiredMem >> 23) + 1) << 23);
requiredMem += step;
if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
while (!testmem) {
testmem = malloc((size_t)requiredMem);
requiredMem -= step;
}
free(testmem);
return (size_t)requiredMem;
}
static void DiB_fillNoise(void* buffer, size_t length)
{
unsigned const prime1 = 2654435761U;
unsigned const prime2 = 2246822519U;
unsigned acc = prime1;
size_t p=0;
for (p=0; p<length; p++) {
acc *= prime2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
}
}
static void DiB_saveDict(const char* dictFileName,
const void* buff, size_t buffSize)
{
FILE* const f = fopen(dictFileName, "wb");
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
{ size_t const n = fwrite(buff, 1, buffSize, f);
if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
{ size_t const n = (size_t)fclose(f);
if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
}
typedef struct {
U64 totalSizeToLoad;
unsigned oneSampleTooLarge;
unsigned nbSamples;
} fileStats;
/*! DiB_fileStats() :
* Given a list of files, and a chunkSize (0 == no chunk, whole files)
* provides the amount of data to be loaded and the resulting nb of samples.
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
*/
static fileStats DiB_fileStats(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, unsigned displayLevel)
{
fileStats fs;
unsigned n;
memset(&fs, 0, sizeof(fs));
for (n=0; n<nbFiles; n++) {
U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
fs.totalSizeToLoad += cappedChunkSize * nbSamples;
fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
fs.nbSamples += nbSamples;
}
DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (unsigned)(fs.totalSizeToLoad >> 10));
return fs;
}
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
ZDICT_fastCover_params_t* fastCoverParams, int optimize)
{
unsigned const displayLevel = params ? params->zParams.notificationLevel :
coverParams ? coverParams->zParams.notificationLevel :
fastCoverParams ? fastCoverParams->zParams.notificationLevel :
0; /* should never happen */
void* const dictBuffer = malloc(maxDictSize);
fileStats const fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
size_t const memMult = params ? MEMMULT :
coverParams ? COVER_MEMMULT:
FASTCOVER_MEMMULT;
size_t const maxMem = DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
int result = 0;
/* Checks */
if ((!sampleSizes) || (!srcBuffer) || (!dictBuffer))
EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
if (fs.oneSampleTooLarge) {
DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
}
if (fs.nbSamples < 5) {
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
}
if (fs.totalSizeToLoad < (unsigned long long)maxDictSize * 8) {
DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
}
/* init */
if (loadedSize < fs.totalSizeToLoad)
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
/* Load input buffer */
DISPLAYLEVEL(3, "Shuffling input files\n");
DiB_shuffle(fileNamesTable, nbFiles);
DiB_loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);
{ size_t dictSize;
if (params) {
DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize,
srcBuffer, sampleSizes, fs.nbSamples,
*params);
} else if (coverParams) {
if (optimize) {
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
srcBuffer, sampleSizes, fs.nbSamples,
coverParams);
if (!ZDICT_isError(dictSize)) {
unsigned splitPercentage = (unsigned)(coverParams->splitPoint * 100);
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParams->k, coverParams->d,
coverParams->steps, splitPercentage);
}
} else {
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
sampleSizes, fs.nbSamples, *coverParams);
}
} else {
assert(fastCoverParams != NULL);
if (optimize) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize,
srcBuffer, sampleSizes, fs.nbSamples,
fastCoverParams);
if (!ZDICT_isError(dictSize)) {
unsigned splitPercentage = (unsigned)(fastCoverParams->splitPoint * 100);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastCoverParams->k,
fastCoverParams->d, fastCoverParams->f, fastCoverParams->steps, splitPercentage,
fastCoverParams->accel);
}
} else {
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer,
sampleSizes, fs.nbSamples, *fastCoverParams);
}
}
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;
goto _cleanup;
}
/* save dict */
DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (unsigned)dictSize, dictFileName);
DiB_saveDict(dictFileName, dictBuffer, dictSize);
}
/* clean up */
_cleanup:
free(srcBuffer);
free(sampleSizes);
free(dictBuffer);
return result;
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This library is designed for a single-threaded console application.
* It exit() and printf() into stderr when it encounters an error condition. */
#ifndef DIBIO_H_003
#define DIBIO_H_003
/*-*************************************
* Dependencies
***************************************/
#define ZDICT_STATIC_LINKING_ONLY
#include "../lib/zdict.h" /* ZDICT_params_t */
/*-*************************************
* Public functions
***************************************/
/*! DiB_trainFromFiles() :
Train a dictionary from a set of files provided by `fileNamesTable`.
Resulting dictionary is written into file `dictFileName`.
`parameters` is optional and can be provided with values set to 0, meaning "default".
@return : 0 == ok. Any other : error.
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
ZDICT_fastCover_params_t* fastCoverParams, int optimize);
#endif

3123
dependencies/zstd-1.5.0/programs/fileio.c vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,179 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef FILEIO_H_23981798732
#define FILEIO_H_23981798732
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
#include "../lib/zstd.h" /* ZSTD_* */
#if defined (__cplusplus)
extern "C" {
#endif
/* *************************************
* Special i/o constants
**************************************/
#define stdinmark "/*stdin*\\"
#define stdoutmark "/*stdout*\\"
#ifdef _WIN32
# define nulmark "NUL"
#else
# define nulmark "/dev/null"
#endif
/**
* We test whether the extension we found starts with 't', and if so, we append
* ".tar" to the end of the output name.
*/
#define LZMA_EXTENSION ".lzma"
#define XZ_EXTENSION ".xz"
#define TXZ_EXTENSION ".txz"
#define GZ_EXTENSION ".gz"
#define TGZ_EXTENSION ".tgz"
#define ZSTD_EXTENSION ".zst"
#define TZSTD_EXTENSION ".tzst"
#define ZSTD_ALT_EXTENSION ".zstd" /* allow decompression of .zstd files */
#define LZ4_EXTENSION ".lz4"
#define TLZ4_EXTENSION ".tlz4"
/*-*************************************
* Types
***************************************/
typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t;
typedef struct FIO_prefs_s FIO_prefs_t;
FIO_prefs_t* FIO_createPreferences(void);
void FIO_freePreferences(FIO_prefs_t* const prefs);
/* Mutable struct containing relevant context and state regarding (de)compression with respect to file I/O */
typedef struct FIO_ctx_s FIO_ctx_t;
FIO_ctx_t* FIO_createContext(void);
void FIO_freeContext(FIO_ctx_t* const fCtx);
typedef struct FIO_display_prefs_s FIO_display_prefs_t;
typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e;
/*-*************************************
* Parameters
***************************************/
/* FIO_prefs_t functions */
void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType);
void FIO_overwriteMode(FIO_prefs_t* const prefs);
void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt);
void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel);
void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel);
void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder);
void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize);
void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag);
void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag);
void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog);
void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag);
void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog);
void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog);
void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch);
void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit);
void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers);
void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode);
void FIO_setLiteralCompressionMode(
FIO_prefs_t* const prefs,
ZSTD_literalCompressionMode_e mode);
void FIO_setProgressSetting(FIO_progressSetting_e progressSetting);
void FIO_setNotificationLevel(int level);
void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles);
void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices);
void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value);
void FIO_setContentSize(FIO_prefs_t* const prefs, int value);
/* FIO_ctx_t functions */
void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value);
void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value);
void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames);
/*-*************************************
* Single File functions
***************************************/
/** FIO_compressFilename() :
* @return : 0 == ok; 1 == pb with src file. */
int FIO_compressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
const char* outfilename, const char* infilename,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams);
/** FIO_decompressFilename() :
* @return : 0 == ok; 1 == pb with src file. */
int FIO_decompressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
const char* outfilename, const char* infilename, const char* dictFileName);
int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel);
/*-*************************************
* Multiple File functions
***************************************/
/** FIO_compressMultipleFilenames() :
* @return : nb of missing files */
int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
const char** inFileNamesTable,
const char* outMirroredDirName,
const char* outDirName,
const char* outFileName, const char* suffix,
const char* dictFileName, int compressionLevel,
ZSTD_compressionParameters comprParams);
/** FIO_decompressMultipleFilenames() :
* @return : nb of missing or skipped files */
int FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
FIO_prefs_t* const prefs,
const char** srcNamesTable,
const char* outMirroredDirName,
const char* outDirName,
const char* outFileName,
const char* dictFileName);
/* FIO_checkFilenameCollisions() :
* Checks for and warns if there are any files that would have the same output path
*/
int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles);
/*-*************************************
* Advanced stuff (should actually be hosted elsewhere)
***************************************/
/* custom crash signal handler */
void FIO_addAbortHandler(void);
#if defined (__cplusplus)
}
#endif
#endif /* FILEIO_H_23981798732 */

View file

@ -0,0 +1,215 @@
/*
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef PLATFORM_H_MODULE
#define PLATFORM_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* **************************************
* Compiler Options
****************************************/
#if defined(_MSC_VER)
# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
# define _CRT_NONSTDC_NO_WARNINGS /* Disable C4996 complaining about posix function names */
# if (_MSC_VER <= 1800) /* 1800 == Visual Studio 2013 */
# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */
# endif
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
#endif
/* **************************************
* Detect 64-bit OS
* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros
****************************************/
#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \
|| defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \
|| (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \
|| defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \
|| defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \
|| (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \
|| defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \
|| (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */
# if !defined(__64BIT__)
# define __64BIT__ 1
# endif
#endif
/* *********************************************************
* Turn on Large Files support (>4GB) for 32-bit Linux/Unix
***********************************************************/
#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
# if !defined(_FILE_OFFSET_BITS)
# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
# endif
# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
# endif
# if defined(_AIX) || defined(__hpux)
# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
# endif
#endif
/* ************************************************************
* Detect POSIX version
* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
* Value of PLATFORM_POSIX_VERSION can be forced on command line
***************************************************************/
#ifndef PLATFORM_POSIX_VERSION
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
|| defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
/* exception rule : force posix version to 200112L,
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
# define PLATFORM_POSIX_VERSION 200112L
/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
* note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
* Posix specification mandates its presence and its content, but target system must respect this spec.
* It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
* otherwise it will block preprocessing stage.
* The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
*/
# elif !defined(_WIN32) \
&& ( defined(__unix__) || defined(__unix) \
|| defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) )
# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__)
# ifndef _POSIX_C_SOURCE
# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
# endif
# endif
# include <unistd.h> /* declares _POSIX_VERSION */
# if defined(_POSIX_VERSION) /* POSIX compliant */
# define PLATFORM_POSIX_VERSION _POSIX_VERSION
# else
# define PLATFORM_POSIX_VERSION 1
# endif
# ifdef __UCLIBC__
# ifndef __USE_MISC
# define __USE_MISC /* enable st_mtim on uclibc */
# endif
# endif
# else /* non-unix target platform (like Windows) */
# define PLATFORM_POSIX_VERSION 0
# endif
#endif /* PLATFORM_POSIX_VERSION */
#if PLATFORM_POSIX_VERSION > 1
/* glibc < 2.26 may not expose struct timespec def without this.
* See issue #1920. */
# ifndef _ATFILE_SOURCE
# define _ATFILE_SOURCE
# endif
#endif
/*-*********************************************
* Detect if isatty() and fileno() are available
************************************************/
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \
|| (PLATFORM_POSIX_VERSION >= 200112L) \
|| defined(__DJGPP__)
# include <unistd.h> /* isatty */
# include <stdio.h> /* fileno */
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
#elif defined(MSDOS) || defined(OS2)
# include <io.h> /* _isatty */
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#elif defined(WIN32) || defined(_WIN32)
# include <io.h> /* _isatty */
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
# include <stdio.h> /* FILE */
static __inline int IS_CONSOLE(FILE* stdStream) {
DWORD dummy;
return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
}
#else
# define IS_CONSOLE(stdStream) 0
#endif
/******************************
* OS-specific IO behaviors
******************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _fileno, _get_osfhandle */
# if !defined(__DJGPP__)
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
# include <winioctl.h> /* FSCTL_SET_SPARSE */
# define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); }
# else
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
# define SET_SPARSE_FILE_MODE(file)
# endif
#else
# define SET_BINARY_MODE(file)
# define SET_SPARSE_FILE_MODE(file)
#endif
#ifndef ZSTD_SPARSE_DEFAULT
# if (defined(__APPLE__) && defined(__MACH__))
# define ZSTD_SPARSE_DEFAULT 0
# else
# define ZSTD_SPARSE_DEFAULT 1
# endif
#endif
#ifndef ZSTD_START_SYMBOLLIST_FRAME
# ifdef __linux__
# define ZSTD_START_SYMBOLLIST_FRAME 2
# elif defined __APPLE__
# define ZSTD_START_SYMBOLLIST_FRAME 4
# else
# define ZSTD_START_SYMBOLLIST_FRAME 0
# endif
#endif
#ifndef ZSTD_SETPRIORITY_SUPPORT
/* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */
# define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L)
#endif
#ifndef ZSTD_NANOSLEEP_SUPPORT
/* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */
# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \
|| (PLATFORM_POSIX_VERSION >= 200112L)
# define ZSTD_NANOSLEEP_SUPPORT 1
# else
# define ZSTD_NANOSLEEP_SUPPORT 0
# endif
#endif
#if defined (__cplusplus)
}
#endif
#endif /* PLATFORM_H_MODULE */

View file

@ -0,0 +1,169 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* === Dependencies === */
#include "timefn.h"
/*-****************************************
* Time functions
******************************************/
#if defined(_WIN32) /* Windows */
#include <stdlib.h> /* abort */
#include <stdio.h> /* perror */
UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond)) {
perror("timefn::QueryPerformanceFrequency");
abort();
}
init = 1;
}
return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond)) {
perror("timefn::QueryPerformanceFrequency");
abort();
}
init = 1;
}
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
#elif defined(__APPLE__) && defined(__MACH__)
UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL;
}
PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom);
}
/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance.
Android also lacks it but does define TIME_UTC. */
#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \
&& defined(TIME_UTC) && !defined(__ANDROID__)
#include <stdlib.h> /* abort */
#include <stdio.h> /* perror */
UTIL_time_t UTIL_getTime(void)
{
/* time must be initialized, othersize it may fail msan test.
* No good reason, likely a limitation of timespec_get() for some target */
UTIL_time_t time = UTIL_TIME_INITIALIZER;
if (timespec_get(&time, TIME_UTC) != TIME_UTC) {
perror("timefn::timespec_get");
abort();
}
return time;
}
static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t diff;
if (end.tv_nsec < begin.tv_nsec) {
diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
} else {
diff.tv_sec = end.tv_sec - begin.tv_sec;
diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
}
return diff;
}
PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
PTime micro = 0;
micro += 1000000ULL * diff.tv_sec;
micro += diff.tv_nsec / 1000ULL;
return micro;
}
PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
PTime nano = 0;
nano += 1000000000ULL * diff.tv_sec;
nano += diff.tv_nsec;
return nano;
}
#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */
UTIL_time_t UTIL_getTime(void) { return clock(); }
PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
#endif
/* returns time span in microseconds */
PTime UTIL_clockSpanMicro(UTIL_time_t clockStart )
{
UTIL_time_t const clockEnd = UTIL_getTime();
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
}
/* returns time span in microseconds */
PTime UTIL_clockSpanNano(UTIL_time_t clockStart )
{
UTIL_time_t const clockEnd = UTIL_getTime();
return UTIL_getSpanTimeNano(clockStart, clockEnd);
}
void UTIL_waitForNextTick(void)
{
UTIL_time_t const clockStart = UTIL_getTime();
UTIL_time_t clockEnd;
do {
clockEnd = UTIL_getTime();
} while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
}

View file

@ -0,0 +1,89 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef TIME_FN_H_MODULE_287987
#define TIME_FN_H_MODULE_287987
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
/*-****************************************
* Local Types
******************************************/
#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# if defined(_AIX)
# include <inttypes.h>
# else
# include <stdint.h> /* intptr_t */
# endif
typedef uint64_t PTime; /* Precise Time */
#else
typedef unsigned long long PTime; /* does not support compilers without long long support */
#endif
/*-****************************************
* Time functions
******************************************/
#if defined(_WIN32) /* Windows */
#include <windows.h> /* LARGE_INTEGER */
typedef LARGE_INTEGER UTIL_time_t;
#define UTIL_TIME_INITIALIZER { { 0, 0 } }
#elif defined(__APPLE__) && defined(__MACH__)
#include <mach/mach_time.h>
typedef PTime UTIL_time_t;
#define UTIL_TIME_INITIALIZER 0
/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance.
Android also lacks it but does define TIME_UTC. */
#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \
&& defined(TIME_UTC) && !defined(__ANDROID__)
typedef struct timespec UTIL_time_t;
#define UTIL_TIME_INITIALIZER { 0, 0 }
#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */
typedef clock_t UTIL_time_t;
#define UTIL_TIME_INITIALIZER 0
#endif
UTIL_time_t UTIL_getTime(void);
PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd);
PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd);
#define SEC_TO_MICRO ((PTime)1000000)
PTime UTIL_clockSpanMicro(UTIL_time_t clockStart);
PTime UTIL_clockSpanNano(UTIL_time_t clockStart);
void UTIL_waitForNextTick(void);
#if defined (__cplusplus)
}
#endif
#endif /* TIME_FN_H_MODULE_287987 */

1291
dependencies/zstd-1.5.0/programs/util.c vendored Normal file

File diff suppressed because it is too large Load diff

290
dependencies/zstd-1.5.0/programs/util.h vendored Normal file
View file

@ -0,0 +1,290 @@
/*
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef UTIL_H_MODULE
#define UTIL_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
#include <stddef.h> /* size_t, ptrdiff_t */
#include <sys/types.h> /* stat, utime */
#include <sys/stat.h> /* stat, chmod */
#include "../lib/common/mem.h" /* U64 */
/*-************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
# define UTIL_fseek _fseeki64
#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
# define UTIL_fseek fseeko
#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS)
# define UTIL_fseek fseeko64
#else
# define UTIL_fseek fseek
#endif
/*-*************************************************
* Sleep & priority functions: Windows - Posix - others
***************************************************/
#if defined(_WIN32)
# include <windows.h>
# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
# define UTIL_sleep(s) Sleep(1000*s)
# define UTIL_sleepMilli(milli) Sleep(milli)
#elif PLATFORM_POSIX_VERSION > 0 /* Unix-like operating system */
# include <unistd.h> /* sleep */
# define UTIL_sleep(s) sleep(s)
# if ZSTD_NANOSLEEP_SUPPORT /* necessarily defined in platform.h */
# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); }
# else
# define UTIL_sleepMilli(milli) /* disabled */
# endif
# if ZSTD_SETPRIORITY_SUPPORT
# include <sys/resource.h> /* setpriority */
# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
# else
# define SET_REALTIME_PRIORITY /* disabled */
# endif
#else /* unknown non-unix operating systen */
# define UTIL_sleep(s) /* disabled */
# define UTIL_sleepMilli(milli) /* disabled */
# define SET_REALTIME_PRIORITY /* disabled */
#endif
/*-****************************************
* Compiler specifics
******************************************/
#if defined(__INTEL_COMPILER)
# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */
#endif
#if defined(__GNUC__)
# define UTIL_STATIC static __attribute__((unused))
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define UTIL_STATIC static inline
#elif defined(_MSC_VER)
# define UTIL_STATIC static __inline
#else
# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
/*-****************************************
* Console log
******************************************/
extern int g_utilDisplayLevel;
/**
* Displays a message prompt and returns success (0) if first character from stdin
* matches any from acceptableLetters. Otherwise, returns failure (1) and displays abortMsg.
* If any of the inputs are stdin itself, then automatically return failure (1).
*/
int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, const char* acceptableLetters, int hasStdinInput);
/*-****************************************
* File functions
******************************************/
#if defined(_MSC_VER)
typedef struct __stat64 stat_t;
typedef int mode_t;
#elif defined(__MINGW32__) && defined (__MSVCRT__)
typedef struct _stati64 stat_t;
#else
typedef struct stat stat_t;
#endif
#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
#define PATH_SEP '\\'
#define STRDUP(s) _strdup(s)
#else
#define PATH_SEP '/'
#include <libgen.h>
#define STRDUP(s) strdup(s)
#endif
/**
* Calls platform's equivalent of stat() on filename and writes info to statbuf.
* Returns success (1) or failure (0).
*/
int UTIL_stat(const char* filename, stat_t* statbuf);
/**
* Instead of getting a file's stats, this updates them with the info in the
* provided stat_t. Currently sets owner, group, atime, and mtime. Will only
* update this info for regular files.
*/
int UTIL_setFileStat(const char* filename, const stat_t* statbuf);
/*
* These helpers operate on a pre-populated stat_t, i.e., the result of
* calling one of the above functions.
*/
int UTIL_isRegularFileStat(const stat_t* statbuf);
int UTIL_isDirectoryStat(const stat_t* statbuf);
int UTIL_isFIFOStat(const stat_t* statbuf);
int UTIL_isBlockDevStat(const stat_t* statbuf);
U64 UTIL_getFileSizeStat(const stat_t* statbuf);
/**
* Like chmod(), but only modifies regular files. Provided statbuf may be NULL,
* in which case this function will stat() the file internally, in order to
* check whether it should be modified.
*/
int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions);
/*
* In the absence of a pre-existing stat result on the file in question, these
* functions will do a stat() call internally and then use that result to
* compute the needed information.
*/
int UTIL_isRegularFile(const char* infilename);
int UTIL_isDirectory(const char* infilename);
int UTIL_isSameFile(const char* file1, const char* file2);
int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]);
int UTIL_isLink(const char* infilename);
int UTIL_isFIFO(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
U64 UTIL_getFileSize(const char* infilename);
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
int UTIL_compareStr(const void *p1, const void *p2);
const char* UTIL_getFileExtension(const char* infilename);
void UTIL_mirrorSourceFilesDirectories(const char** fileNamesTable, unsigned int nbFiles, const char *outDirName);
char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName);
/*-****************************************
* Lists of Filenames
******************************************/
typedef struct
{ const char** fileNames;
char* buf; /* fileNames are stored in this buffer (or are read-only) */
size_t tableSize; /* nb of fileNames */
size_t tableCapacity;
} FileNamesTable;
/*! UTIL_createFileNamesTable_fromFileName() :
* read filenames from @inputFileName, and store them into returned object.
* @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist).
* Note: inputFileSize must be less than 50MB
*/
FileNamesTable*
UTIL_createFileNamesTable_fromFileName(const char* inputFileName);
/*! UTIL_assembleFileNamesTable() :
* This function takes ownership of its arguments, @filenames and @buf,
* and store them inside the created object.
* note : this function never fails,
* it will rather exit() the program if internal allocation fails.
* @return : resulting FileNamesTable* object.
*/
FileNamesTable*
UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf);
/*! UTIL_freeFileNamesTable() :
* This function is compatible with NULL argument and never fails.
*/
void UTIL_freeFileNamesTable(FileNamesTable* table);
/*! UTIL_mergeFileNamesTable():
* @return : FileNamesTable*, concatenation of @table1 and @table2
* note: @table1 and @table2 are consumed (freed) by this operation
*/
FileNamesTable*
UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2);
/*! UTIL_expandFNT() :
* read names from @fnt, and expand those corresponding to directories
* update @fnt, now containing only file names,
* @return : 0 in case of success, 1 if error
* note : in case of error, @fnt[0] is NULL
*/
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks);
/*! UTIL_createFNT_fromROTable() :
* copy the @filenames pointer table inside the returned object.
* The names themselves are still stored in their original buffer, which must outlive the object.
* @return : a FileNamesTable* object,
* or NULL in case of error
*/
FileNamesTable*
UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames);
/*! UTIL_allocateFileNamesTable() :
* Allocates a table of const char*, to insert read-only names later on.
* The created FileNamesTable* doesn't hold a buffer.
* @return : FileNamesTable*, or NULL, if allocation fails.
*/
FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize);
/*! UTIL_refFilename() :
* Add a reference to read-only name into @fnt table.
* As @filename is only referenced, its lifetime must outlive @fnt.
* Internal table must be large enough to reference a new member,
* otherwise its UB (protected by an `assert()`).
*/
void UTIL_refFilename(FileNamesTable* fnt, const char* filename);
/* UTIL_createExpandedFNT() is only active if UTIL_HAS_CREATEFILELIST is defined.
* Otherwise, UTIL_createExpandedFNT() is a shell function which does nothing
* apart from displaying a warning message.
*/
#ifdef _WIN32
# define UTIL_HAS_CREATEFILELIST
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
# define UTIL_HAS_CREATEFILELIST
# define UTIL_HAS_MIRRORFILELIST
#else
/* do not define UTIL_HAS_CREATEFILELIST */
#endif
/*! UTIL_createExpandedFNT() :
* read names from @filenames, and expand those corresponding to directories.
* links are followed or not depending on @followLinks directive.
* @return : an expanded FileNamesTable*, where each name is a file
* or NULL in case of error
*/
FileNamesTable*
UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks);
/*-****************************************
* System
******************************************/
int UTIL_countPhysicalCores(void);
#if defined (__cplusplus)
}
#endif
#endif /* UTIL_H_MODULE */

View file

@ -0,0 +1,17 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* minimal set of defines required to generate zstd.res from zstd.rc */
#define VS_VERSION_INFO 1
#define VS_FFI_FILEFLAGSMASK 0x0000003FL
#define VOS_NT_WINDOWS32 0x00040004L
#define VFT_DLL 0x00000002L
#define VFT2_UNKNOWN 0x00000000L

View file

@ -0,0 +1,51 @@
// Microsoft Visual C++ generated resource script.
//
#include "zstd.h" /* ZSTD_VERSION_STRING */
#define APSTUDIO_READONLY_SYMBOLS
#include "verrsrc.h"
#undef APSTUDIO_READONLY_SYMBOLS
#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
LANGUAGE 9, 1
/////////////////////////////////////////////////////////////////////////////
//
// Version
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0
PRODUCTVERSION ZSTD_VERSION_MAJOR,ZSTD_VERSION_MINOR,ZSTD_VERSION_RELEASE,0
FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
#ifdef _DEBUG
FILEFLAGS VS_FF_DEBUG
#else
FILEFLAGS 0x0L
#endif
FILEOS VOS_NT_WINDOWS32
FILETYPE VFT_DLL
FILESUBTYPE VFT2_UNKNOWN
BEGIN
BLOCK "StringFileInfo"
BEGIN
BLOCK "040904B0"
BEGIN
VALUE "CompanyName", "Yann Collet, Facebook, Inc."
VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
VALUE "FileVersion", ZSTD_VERSION_STRING
VALUE "InternalName", "zstd.exe"
VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
VALUE "OriginalFilename", "zstd.exe"
VALUE "ProductName", "Zstandard"
VALUE "ProductVersion", ZSTD_VERSION_STRING
END
END
BLOCK "VarFileInfo"
BEGIN
VALUE "Translation", 0x0409, 1200
END
END
#endif

Binary file not shown.

Binary file not shown.

491
dependencies/zstd-1.5.0/programs/zstd.1 vendored Normal file
View file

@ -0,0 +1,491 @@
.
.TH "ZSTD" "1" "May 2021" "zstd 1.5.0" "User Commands"
.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
.
.SH "SYNOPSIS"
\fBzstd\fR [\fIOPTIONS\fR] [\-|\fIINPUT\-FILE\fR] [\-o \fIOUTPUT\-FILE\fR]
.
.P
\fBzstdmt\fR is equivalent to \fBzstd \-T0\fR
.
.P
\fBunzstd\fR is equivalent to \fBzstd \-d\fR
.
.P
\fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
.
.SH "DESCRIPTION"
\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, with fast modes at > 200 MB/s per core, and strong modes nearing lzma compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
.
.P
\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences :
.
.IP "\(bu" 4
Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
.
.IP "\(bu" 4
When compressing a single file, \fBzstd\fR displays progress notifications and result summary by default\. Use \fB\-q\fR to turn them off\.
.
.IP "\(bu" 4
\fBzstd\fR does not accept input from console, but it properly accepts \fBstdin\fR when it\'s not the console\.
.
.IP "\(bu" 4
\fBzstd\fR displays a short help page when command line is an error\. Use \fB\-q\fR to turn it off\.
.
.IP "" 0
.
.P
\fBzstd\fR compresses or decompresses each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
.
.P
Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
.
.IP "\(bu" 4
When compressing, the suffix \fB\.zst\fR is appended to the source filename to get the target filename\.
.
.IP "\(bu" 4
When decompressing, the \fB\.zst\fR suffix is removed from the source filename to get the target filename
.
.IP "" 0
.
.SS "Concatenation with \.zst files"
It is possible to concatenate \fB\.zst\fR files as is\. \fBzstd\fR will decompress such files as if they were a single \fB\.zst\fR file\.
.
.SH "OPTIONS"
.
.SS "Integer suffixes and special values"
In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
.
.TP
\fBKiB\fR
Multiply the integer by 1,024 (2^10)\. \fBKi\fR, \fBK\fR, and \fBKB\fR are accepted as synonyms for \fBKiB\fR\.
.
.TP
\fBMiB\fR
Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
.
.SS "Operation mode"
If multiple operation mode options are given, the last one takes effect\.
.
.TP
\fB\-z\fR, \fB\-\-compress\fR
Compress\. This is the default operation mode when no operation mode option is specified and no other operation mode is implied from the command name (for example, \fBunzstd\fR implies \fB\-\-decompress\fR)\.
.
.TP
\fB\-d\fR, \fB\-\-decompress\fR, \fB\-\-uncompress\fR
Decompress\.
.
.TP
\fB\-t\fR, \fB\-\-test\fR
Test the integrity of compressed \fIfiles\fR\. This option is equivalent to \fB\-\-decompress \-\-stdout\fR except that the decompressed data is discarded instead of being written to standard output\. No files are created or removed\.
.
.TP
\fB\-b#\fR
Benchmark file(s) using compression level #
.
.TP
\fB\-\-train FILEs\fR
Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\.
.
.TP
\fB\-l\fR, \fB\-\-list\fR
Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command can be augmented with the \fB\-v\fR modifier\.
.
.SS "Operation modifiers"
.
.IP "\(bu" 4
\fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3)
.
.IP "\(bu" 4
\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
.
.IP "\(bu" 4
\fB\-\-fast[=#]\fR: switch to ultra\-fast compression levels\. If \fB=#\fR is not present, it defaults to \fB1\fR\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. This setting overwrites compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\.
.
.IP "\(bu" 4
\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
.
.IP "\(bu" 4
\fB\-\-single\-thread\fR: Does not spawn a thread for compression, use a single thread for both I/O and compression\. In this mode, compression is serialized with I/O, which is slightly slower\. (This is different from \fB\-T1\fR, which spawns 1 compression thread in parallel of I/O)\. This mode is the only one available when multithread support is disabled\. Single\-thread mode features lower memory usage\. Final compressed result is slightly different from \fB\-T1\fR\.
.
.IP "\(bu" 4
\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
.
.IP "\(bu" 4
\fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if not \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
.
.IP
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
.
.IP "\(bu" 4
\fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s)
.
.IP "\(bu" 4
\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\.
.
.IP
Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog required to cover the whole file)\. You can also manually force it\. Node: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e 4096), and by setting a large \fB\-\-zstd=chainLog=\fR
.
.IP "\(bu" 4
\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your milage may vary\.
.
.IP "\(bu" 4
\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
.
.IP "\(bu" 4
\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\.
.
.IP "\(bu" 4
\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
.
.IP "\(bu" 4
\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (ie\. you can increase or decrease it)\.
.
.IP
This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\.
.
.IP "\(bu" 4
\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
.
.IP "\(bu" 4
\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
.
.IP "\(bu" 4
\fB\-o FILE\fR: save result into \fBFILE\fR
.
.IP "\(bu" 4
\fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\.
.
.IP "\(bu" 4
\fB\-c\fR, \fB\-\-stdout\fR: force write to standard output, even if it is the console
.
.IP "\(bu" 4
\fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
.
.IP "\(bu" 4
\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \-o, will trigger a confirmation prompt (which can be silenced with \-f), as this is a destructive operation\.
.
.IP "\(bu" 4
\fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
.
.IP "\(bu" 4
\fB\-r\fR: operate recursively on directories
.
.IP "\(bu" 4
\fB\-\-filelist FILE\fR read a list of files to process as content from \fBFILE\fR\. Format is compatible with \fBls\fR output, with one file per line\.
.
.IP "\(bu" 4
\fB\-\-output\-dir\-flat DIR\fR: resulting files are stored into target \fBDIR\fR directory, instead of same directory as origin file\. Be aware that this command can introduce name collision issues, if multiple files, from different directories, end up having the same name\. Collision resolution ensures first file with a given name will be present in \fBDIR\fR, while in combination with \fB\-f\fR, the last file will be present instead\.
.
.IP "\(bu" 4
\fB\-\-output\-dir\-mirror DIR\fR: similar to \fB\-\-output\-dir\-flat\fR, the output files are stored underneath target \fBDIR\fR directory, but this option will replicate input directory hierarchy into output \fBDIR\fR\.
.
.IP
If input directory contains "\.\.", the files in this directory will be ignored\. If input directory is an absolute directory (i\.e\. "/var/tmp/abc"), it will be stored into the "output\-dir/var/tmp/abc"\. If there are multiple input files or directories, name collision resolution will follow the same rules as \fB\-\-output\-dir\-flat\fR\.
.
.IP "\(bu" 4
\fB\-\-format=FORMAT\fR: compress and decompress in other formats\. If compiled with support, zstd can compress to or decompress from other compression algorithm formats\. Possibly available options are \fBzstd\fR, \fBgzip\fR, \fBxz\fR, \fBlzma\fR, and \fBlz4\fR\. If no such format is provided, \fBzstd\fR is the default\.
.
.IP "\(bu" 4
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
.
.IP "\(bu" 4
\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
.
.IP "\(bu" 4
\fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information
.
.IP "\(bu" 4
\fB\-q\fR, \fB\-\-quiet\fR: suppress warnings, interactivity, and notifications\. specify twice to suppress errors too\.
.
.IP "\(bu" 4
\fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
.
.IP "\(bu" 4
\fB\-\-show\-default\-cparams\fR: Shows the default compression parameters that will be used for a particular src file\. If the provided src file is not a regular file (eg\. named pipe), the cli will just output the default parameters\. That is, the parameters that are used when the src size is unknown\.
.
.IP "\(bu" 4
\fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
.
.IP "" 0
.
.SS "Restricted usage of Environment Variables"
Using environment variables to set parameters has security implications\. Therefore, this avenue is intentionally restricted\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\.
.
.P
\fBZSTD_CLEVEL\fR can be used to set the level between 1 and 19 (the "normal" range)\. If the value of \fBZSTD_CLEVEL\fR is not a valid integer, it will be ignored with a warning message\. \fBZSTD_CLEVEL\fR just replaces the default compression level (\fB3\fR)\.
.
.P
\fBZSTD_NBTHREADS\fR can be used to set the number of threads \fBzstd\fR will attempt to use during compression\. If the value of \fBZSTD_NBTHREADS\fR is not a valid unsigned integer, it will be ignored with a warning message\. \fBZSTD_NBTHREADS\fR has a default value of (\fB1\fR), and is capped at ZSTDMT_NBWORKERS_MAX==200\. \fBzstd\fR must be compiled with multithread support for this to have any effect\.
.
.P
They can both be overridden by corresponding command line arguments: \fB\-#\fR for compression level and \fB\-T#\fR for number of compression threads\.
.
.SH "DICTIONARY BUILDER"
\fBzstd\fR offers \fIdictionary\fR compression, which greatly improves efficiency on small files and messages\. It\'s possible to train \fBzstd\fR with a set of samples, the result of which is saved into a file called a \fBdictionary\fR\. Then during compression and decompression, reference the same dictionary, using command \fB\-D dictionaryFileName\fR\. Compression of small files similar to the sample set will be greatly improved\.
.
.TP
\fB\-\-train FILEs\fR
Use FILEs as training set to create a dictionary\. The training set should contain a lot of small files (> 100), and weight typically 100x the target dictionary size (for example, 10 MB for a 100 KB dictionary)\.
.
.IP
Supports multithreading if \fBzstd\fR is compiled with threading support\. Additional parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
.
.TP
\fB\-o file\fR
Dictionary saved into \fBfile\fR (default name: dictionary)\.
.
.TP
\fB\-\-maxdict=#\fR
Limit dictionary to specified size (default: 112640)\.
.
.TP
\fB\-#\fR
Use \fB#\fR compression level during training (optional)\. Will generate statistics more tuned for selected compression level, resulting in a \fIsmall\fR compression ratio improvement for this level\.
.
.TP
\fB\-B#\fR
Split input files in blocks of size # (default: no split)
.
.TP
\fB\-\-dictID=#\fR
A dictionary ID is a locally unique ID that a decoder can use to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to give a precise number instead\. Short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\. However, it\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\.
.
.TP
\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
Select parameters for the default dictionary builder algorithm named cover\. If \fId\fR is not specified, then it tries \fId\fR = 6 and \fId\fR = 8\. If \fIk\fR is not specified, then it tries \fIsteps\fR values in the range [50, 2000]\. If \fIsteps\fR is not specified, then the default value of 40 is used\. If \fIsplit\fR is not specified or split <= 0, then the default value of 100 is used\. Requires that \fId\fR <= \fIk\fR\. If \fIshrink\fR flag is not used, then the default value for \fIshrinkDict\fR of 0 is used\. If \fIshrink\fR is not specified, then the default value for \fIshrinkDictMaxRegression\fR of 1 is used\.
.
.IP
Selects segments of size \fIk\fR with highest score to put in the dictionary\. The score of a segment is computed by the sum of the frequencies of all the subsegments of size \fId\fR\. Generally \fId\fR should be in the range [6, 8], occasionally up to 16, but the algorithm will run faster with d <= \fI8\fR\. Good values for \fIk\fR vary widely based on the input data, but a safe range is [2 * \fId\fR, 2000]\. If \fIsplit\fR is 100, all input samples are used for both training and testing to find optimal \fId\fR and \fIk\fR to build dictionary\. Supports multithreading if \fBzstd\fR is compiled with threading support\. Having \fIshrink\fR enabled takes a truncated dictionary of minimum size and doubles in size until compression ratio of the truncated dictionary is at most \fIshrinkDictMaxRegression%\fR worse than the compression ratio of the largest dictionary\.
.
.IP
Examples:
.
.IP
\fBzstd \-\-train\-cover FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=k=50,d=8 FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=d=8,steps=500 FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=k=50 FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=k=50,split=60 FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=shrink FILEs\fR
.
.IP
\fBzstd \-\-train\-cover=shrink=2 FILEs\fR
.
.TP
\fB\-\-train\-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]\fR
Same as cover but with extra parameters \fIf\fR and \fIaccel\fR and different default value of split If \fIsplit\fR is not specified, then it tries \fIsplit\fR = 75\. If \fIf\fR is not specified, then it tries \fIf\fR = 20\. Requires that 0 < \fIf\fR < 32\. If \fIaccel\fR is not specified, then it tries \fIaccel\fR = 1\. Requires that 0 < \fIaccel\fR <= 10\. Requires that \fId\fR = 6 or \fId\fR = 8\.
.
.IP
\fIf\fR is log of size of array that keeps track of frequency of subsegments of size \fId\fR\. The subsegment is hashed to an index in the range [0,2^\fIf\fR \- 1]\. It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency\. Using a higher \fIf\fR reduces collision but takes longer\.
.
.IP
Examples:
.
.IP
\fBzstd \-\-train\-fastcover FILEs\fR
.
.IP
\fBzstd \-\-train\-fastcover=d=8,f=15,accel=2 FILEs\fR
.
.TP
\fB\-\-train\-legacy[=selectivity=#]\fR
Use legacy dictionary builder algorithm with the given dictionary \fIselectivity\fR (default: 9)\. The smaller the \fIselectivity\fR value, the denser the dictionary, improving its efficiency but reducing its possible maximum size\. \fB\-\-train\-legacy=s=#\fR is also accepted\.
.
.IP
Examples:
.
.IP
\fBzstd \-\-train\-legacy FILEs\fR
.
.IP
\fBzstd \-\-train\-legacy=selectivity=8 FILEs\fR
.
.SH "BENCHMARK"
.
.TP
\fB\-b#\fR
benchmark file(s) using compression level #
.
.TP
\fB\-e#\fR
benchmark file(s) using multiple compression levels, from \fB\-b#\fR to \fB\-e#\fR (inclusive)
.
.TP
\fB\-i#\fR
minimum evaluation time, in seconds (default: 3s), benchmark mode only
.
.TP
\fB\-B#\fR, \fB\-\-block\-size=#\fR
cut file(s) into independent blocks of size # (default: no block)
.
.TP
\fB\-\-priority=rt\fR
set process priority to real\-time
.
.P
\fBOutput Format:\fR CompressionLevel#Filename : IntputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
.
.P
\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
.
.SH "ADVANCED COMPRESSION OPTIONS"
.
.SS "\-B#:"
Select the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to (slightly) different compressed frames\.
.
.SS "\-\-zstd[=options]:"
\fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
.
.TP
\fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
Specify a strategy used by a match finder\.
.
.IP
There are 9 strategies numbered from 1 to 9, from faster to stronger: 1=ZSTD_fast, 2=ZSTD_dfast, 3=ZSTD_greedy, 4=ZSTD_lazy, 5=ZSTD_lazy2, 6=ZSTD_btlazy2, 7=ZSTD_btopt, 8=ZSTD_btultra, 9=ZSTD_btultra2\.
.
.TP
\fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
Specify the maximum number of bits for a match distance\.
.
.IP
The higher number of increases the chance to find a match which usually improves compression ratio\. It also increases memory requirements for the compressor and decompressor\. The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32\-bit platforms and 31 (2 GiB) on 64\-bit platforms\.
.
.IP
Note: If \fBwindowLog\fR is set to larger than 27, \fB\-\-long=windowLog\fR or \fB\-\-memory=windowSize\fR needs to be passed to the decompressor\.
.
.TP
\fBhashLog\fR=\fIhlog\fR, \fBhlog\fR=\fIhlog\fR
Specify the maximum number of bits for a hash table\.
.
.IP
Bigger hash tables cause less collisions which usually makes compression faster, but requires more memory during compression\.
.
.IP
The minimum \fIhlog\fR is 6 (64 B) and the maximum is 30 (1 GiB)\.
.
.TP
\fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
Specify the maximum number of bits for a hash chain or a binary tree\.
.
.IP
Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\.
.
.IP
The minimum \fIclog\fR is 6 (64 B) and the maximum is 29 (524 Mib) on 32\-bit platforms and 30 (1 Gib) on 64\-bit platforms\.
.
.TP
\fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
Specify the maximum number of searches in a hash chain or a binary tree using logarithmic scale\.
.
.IP
More searches increases the chance to find a match which usually increases compression ratio but decreases compression speed\.
.
.IP
The minimum \fIslog\fR is 1 and the maximum is \'windowLog\' \- 1\.
.
.TP
\fBminMatch\fR=\fImml\fR, \fBmml\fR=\fImml\fR
Specify the minimum searched length of a match in a hash table\.
.
.IP
Larger search lengths usually decrease compression ratio but improve decompression speed\.
.
.IP
The minimum \fImml\fR is 3 and the maximum is 7\.
.
.TP
\fBtargetLength\fR=\fItlen\fR, \fBtlen\fR=\fItlen\fR
The impact of this field vary depending on selected strategy\.
.
.IP
For ZSTD_btopt, ZSTD_btultra and ZSTD_btultra2, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\. t For ZSTD_fast, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
.
.IP
For all other strategies, this field has no impact\.
.
.IP
The minimum \fItlen\fR is 0 and the maximum is 128 Kib\.
.
.TP
\fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
.
.IP
The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default" : \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
.
.TP
\fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR
Specify the maximum size for a hash table used for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Bigger hash tables usually improve compression ratio at the expense of more memory during compression and a decrease in compression speed\.
.
.IP
The minimum \fIlhlog\fR is 6 and the maximum is 30 (default: 20)\.
.
.TP
\fBldmMinMatch\fR=\fIlmml\fR, \fBlmml\fR=\fIlmml\fR
Specify the minimum searched length of a match for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger/very small values usually decrease compression ratio\.
.
.IP
The minimum \fIlmml\fR is 4 and the maximum is 4096 (default: 64)\.
.
.TP
\fBldmBucketSizeLog\fR=\fIlblog\fR, \fBlblog\fR=\fIlblog\fR
Specify the size of each bucket for the hash table used for long distance matching\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger bucket sizes improve collision resolution but decrease compression speed\.
.
.IP
The minimum \fIlblog\fR is 1 and the maximum is 8 (default: 3)\.
.
.TP
\fBldmHashRateLog\fR=\fIlhrlog\fR, \fBlhrlog\fR=\fIlhrlog\fR
Specify the frequency of inserting entries into the long distance matching hash table\.
.
.IP
This option is ignored unless long distance matching is enabled\.
.
.IP
Larger values will improve compression speed\. Deviating far from the default value will likely result in a decrease in compression ratio\.
.
.IP
The default value is \fBwlog \- lhlog\fR\.
.
.SS "Example"
The following parameters sets advanced compression options to something similar to predefined level 19 for files bigger than 256 KB:
.
.P
\fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
.
.SH "BUGS"
Report bugs at: https://github\.com/facebook/zstd/issues
.
.SH "AUTHOR"
Yann Collet

View file

@ -0,0 +1,587 @@
zstd(1) -- zstd, zstdmt, unzstd, zstdcat - Compress or decompress .zst files
============================================================================
SYNOPSIS
--------
`zstd` [*OPTIONS*] [-|_INPUT-FILE_] [-o _OUTPUT-FILE_]
`zstdmt` is equivalent to `zstd -T0`
`unzstd` is equivalent to `zstd -d`
`zstdcat` is equivalent to `zstd -dcf`
DESCRIPTION
-----------
`zstd` is a fast lossless compression algorithm and data compression tool,
with command line syntax similar to `gzip (1)` and `xz (1)`.
It is based on the **LZ77** family, with further FSE & huff0 entropy stages.
`zstd` offers highly configurable compression speed,
with fast modes at > 200 MB/s per core,
and strong modes nearing lzma compression ratios.
It also features a very fast decoder, with speeds > 500 MB/s per core.
`zstd` command line syntax is generally similar to gzip,
but features the following differences :
- Source files are preserved by default.
It's possible to remove them automatically by using the `--rm` command.
- When compressing a single file, `zstd` displays progress notifications
and result summary by default.
Use `-q` to turn them off.
- `zstd` does not accept input from console,
but it properly accepts `stdin` when it's not the console.
- `zstd` displays a short help page when command line is an error.
Use `-q` to turn it off.
`zstd` compresses or decompresses each _file_ according to the selected
operation mode.
If no _files_ are given or _file_ is `-`, `zstd` reads from standard input
and writes the processed data to standard output.
`zstd` will refuse to write compressed data to standard output
if it is a terminal : it will display an error message and skip the _file_.
Similarly, `zstd` will refuse to read compressed data from standard input
if it is a terminal.
Unless `--stdout` or `-o` is specified, _files_ are written to a new file
whose name is derived from the source _file_ name:
* When compressing, the suffix `.zst` is appended to the source filename to
get the target filename.
* When decompressing, the `.zst` suffix is removed from the source filename to
get the target filename
### Concatenation with .zst files
It is possible to concatenate `.zst` files as is.
`zstd` will decompress such files as if they were a single `.zst` file.
OPTIONS
-------
### Integer suffixes and special values
In most places where an integer argument is expected,
an optional suffix is supported to easily indicate large integers.
There must be no space between the integer and the suffix.
* `KiB`:
Multiply the integer by 1,024 (2\^10).
`Ki`, `K`, and `KB` are accepted as synonyms for `KiB`.
* `MiB`:
Multiply the integer by 1,048,576 (2\^20).
`Mi`, `M`, and `MB` are accepted as synonyms for `MiB`.
### Operation mode
If multiple operation mode options are given,
the last one takes effect.
* `-z`, `--compress`:
Compress.
This is the default operation mode when no operation mode option is specified
and no other operation mode is implied from the command name
(for example, `unzstd` implies `--decompress`).
* `-d`, `--decompress`, `--uncompress`:
Decompress.
* `-t`, `--test`:
Test the integrity of compressed _files_.
This option is equivalent to `--decompress --stdout` except that the
decompressed data is discarded instead of being written to standard output.
No files are created or removed.
* `-b#`:
Benchmark file(s) using compression level #
* `--train FILEs`:
Use FILEs as a training set to create a dictionary.
The training set should contain a lot of small files (> 100).
* `-l`, `--list`:
Display information related to a zstd compressed file, such as size, ratio, and checksum.
Some of these fields may not be available.
This command can be augmented with the `-v` modifier.
### Operation modifiers
* `-#`:
`#` compression level \[1-19] (default: 3)
* `--ultra`:
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
* `--fast[=#]`:
switch to ultra-fast compression levels.
If `=#` is not present, it defaults to `1`.
The higher the value, the faster the compression speed,
at the cost of some compression ratio.
This setting overwrites compression level if one was set previously.
Similarly, if a compression level is set after `--fast`, it overrides it.
* `-T#`, `--threads=#`:
Compress using `#` working threads (default: 1).
If `#` is 0, attempt to detect and use the number of physical CPU cores.
In all cases, the nb of threads is capped to `ZSTDMT_NBWORKERS_MAX`,
which is either 64 in 32-bit mode, or 256 for 64-bit environments.
This modifier does nothing if `zstd` is compiled without multithread support.
* `--single-thread`:
Does not spawn a thread for compression, use a single thread for both I/O and compression.
In this mode, compression is serialized with I/O, which is slightly slower.
(This is different from `-T1`, which spawns 1 compression thread in parallel of I/O).
This mode is the only one available when multithread support is disabled.
Single-thread mode features lower memory usage.
Final compressed result is slightly different from `-T1`.
* `--adapt[=min=#,max=#]` :
`zstd` will dynamically adapt compression level to perceived I/O conditions.
Compression level adaptation can be observed live by using command `-v`.
Adaptation can be constrained between supplied `min` and `max` levels.
The feature works when combined with multi-threading and `--long` mode.
It does not work with `--single-thread`.
It sets window size to 8 MB by default (can be changed manually, see `wlog`).
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2).
* `--long[=#]`:
enables long distance matching with `#` `windowLog`, if not `#` is not
present it defaults to `27`.
This increases the window size (`windowLog`) and memory usage for both the
compressor and decompressor.
This setting is designed to improve the compression ratio for files with
long matches at a large distance.
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
`--memory=windowSize` needs to be passed to the decompressor.
* `-D DICT`:
use `DICT` as Dictionary to compress or decompress FILE(s)
* `--patch-from FILE`:
Specify the file to be used as a reference point for zstd's diff engine.
This is effectively dictionary compression with some convenient parameter
selection, namely that windowSize > srcSize.
Note: cannot use both this and -D together
Note: `--long` mode will be automatically activated if chainLog < fileLog
(fileLog being the windowLog required to cover the whole file). You
can also manually force it.
Node: for all levels, you can use --patch-from in --single-thread mode
to improve compression ratio at the cost of speed
Note: for level 19, you can get increased compression ratio at the cost
of speed by specifying `--zstd=targetLength=` to be something large
(i.e 4096), and by setting a large `--zstd=chainLog=`
* `--rsyncable` :
`zstd` will periodically synchronize the compression state to make the
compressed file more rsync-friendly. There is a negligible impact to
compression ratio, and the faster compression levels will see a small
compression speed hit.
This feature does not work with `--single-thread`. You probably don't want
to use it with long range mode, since it will decrease the effectiveness of
the synchronization points, but your milage may vary.
* `-C`, `--[no-]check`:
add integrity check computed from uncompressed data (default: enabled)
* `--[no-]content-size`:
enable / disable whether or not the original size of the file is placed in
the header of the compressed file. The default option is
--content-size (meaning that the original size will be placed in the header).
* `--no-dictID`:
do not store dictionary ID within frame header (dictionary compression).
The decoder will have to rely on implicit knowledge about which dictionary to use,
it won't be able to check if it's correct.
* `-M#`, `--memory=#`:
Set a memory usage limit. By default, Zstandard uses 128 MB for decompression
as the maximum amount of memory the decompressor is allowed to use, but you can
override this manually if need be in either direction (ie. you can increase or
decrease it).
This is also used during compression when using with --patch-from=. In this case,
this parameter overrides that maximum size allowed for a dictionary. (128 MB).
* `--stream-size=#` :
Sets the pledged source size of input coming from a stream. This value must be exact, as it
will be included in the produced frame header. Incorrect stream sizes will cause an error.
This information will be used to better optimize compression parameters, resulting in
better and potentially faster compression, especially for smaller source sizes.
* `--size-hint=#`:
When handling input from a stream, `zstd` must guess how large the source size
will be when optimizing compression parameters. If the stream size is relatively
small, this guess may be a poor one, resulting in a higher compression ratio than
expected. This feature allows for controlling the guess when needed.
Exact guesses result in better compression ratios. Overestimates result in slightly
degraded compression ratios, while underestimates may result in significant degradation.
* `-o FILE`:
save result into `FILE`
* `-f`, `--force`:
disable input and output checks. Allows overwriting existing files, input
from console, output to stdout, operating on links, block devices, etc.
* `-c`, `--stdout`:
force write to standard output, even if it is the console
* `--[no-]sparse`:
enable / disable sparse FS support,
to make files with many zeroes smaller on disk.
Creating sparse files may save disk space and speed up decompression by
reducing the amount of disk I/O.
default: enabled when output is into a file,
and disabled when output is stdout.
This setting overrides default and can force sparse mode over stdout.
* `--rm`:
remove source file(s) after successful compression or decompression. If used in combination with
-o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation.
* `-k`, `--keep`:
keep source file(s) after successful compression or decompression.
This is the default behavior.
* `-r`:
operate recursively on directories
* `--filelist FILE`
read a list of files to process as content from `FILE`.
Format is compatible with `ls` output, with one file per line.
* `--output-dir-flat DIR`:
resulting files are stored into target `DIR` directory,
instead of same directory as origin file.
Be aware that this command can introduce name collision issues,
if multiple files, from different directories, end up having the same name.
Collision resolution ensures first file with a given name will be present in `DIR`,
while in combination with `-f`, the last file will be present instead.
* `--output-dir-mirror DIR`:
similar to `--output-dir-flat`,
the output files are stored underneath target `DIR` directory,
but this option will replicate input directory hierarchy into output `DIR`.
If input directory contains "..", the files in this directory will be ignored.
If input directory is an absolute directory (i.e. "/var/tmp/abc"),
it will be stored into the "output-dir/var/tmp/abc".
If there are multiple input files or directories,
name collision resolution will follow the same rules as `--output-dir-flat`.
* `--format=FORMAT`:
compress and decompress in other formats. If compiled with
support, zstd can compress to or decompress from other compression algorithm
formats. Possibly available options are `zstd`, `gzip`, `xz`, `lzma`, and `lz4`.
If no such format is provided, `zstd` is the default.
* `-h`/`-H`, `--help`:
display help/long help and exit
* `-V`, `--version`:
display version number and exit.
Advanced : `-vV` also displays supported formats.
`-vvV` also displays POSIX support.
`-q` will only display the version number, suitable for machine reading.
* `-v`, `--verbose`:
verbose mode, display more information
* `-q`, `--quiet`:
suppress warnings, interactivity, and notifications.
specify twice to suppress errors too.
* `--no-progress`:
do not display the progress bar, but keep all other messages.
* `--show-default-cparams`:
Shows the default compression parameters that will be used for a
particular src file. If the provided src file is not a regular file
(eg. named pipe), the cli will just output the default parameters.
That is, the parameters that are used when the src size is unknown.
* `--`:
All arguments after `--` are treated as files
### Restricted usage of Environment Variables
Using environment variables to set parameters has security implications.
Therefore, this avenue is intentionally restricted.
Only `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` are currently supported.
They set the compression level and number of threads to use during compression, respectively.
`ZSTD_CLEVEL` can be used to set the level between 1 and 19 (the "normal" range).
If the value of `ZSTD_CLEVEL` is not a valid integer, it will be ignored with a warning message.
`ZSTD_CLEVEL` just replaces the default compression level (`3`).
`ZSTD_NBTHREADS` can be used to set the number of threads `zstd` will attempt to use during compression.
If the value of `ZSTD_NBTHREADS` is not a valid unsigned integer, it will be ignored with a warning message.
`ZSTD_NBTHREADS` has a default value of (`1`), and is capped at ZSTDMT_NBWORKERS_MAX==200. `zstd` must be
compiled with multithread support for this to have any effect.
They can both be overridden by corresponding command line arguments:
`-#` for compression level and `-T#` for number of compression threads.
DICTIONARY BUILDER
------------------
`zstd` offers _dictionary_ compression,
which greatly improves efficiency on small files and messages.
It's possible to train `zstd` with a set of samples,
the result of which is saved into a file called a `dictionary`.
Then during compression and decompression, reference the same dictionary,
using command `-D dictionaryFileName`.
Compression of small files similar to the sample set will be greatly improved.
* `--train FILEs`:
Use FILEs as training set to create a dictionary.
The training set should contain a lot of small files (> 100),
and weight typically 100x the target dictionary size
(for example, 10 MB for a 100 KB dictionary).
Supports multithreading if `zstd` is compiled with threading support.
Additional parameters can be specified with `--train-fastcover`.
The legacy dictionary builder can be accessed with `--train-legacy`.
The cover dictionary builder can be accessed with `--train-cover`.
Equivalent to `--train-fastcover=d=8,steps=4`.
* `-o file`:
Dictionary saved into `file` (default name: dictionary).
* `--maxdict=#`:
Limit dictionary to specified size (default: 112640).
* `-#`:
Use `#` compression level during training (optional).
Will generate statistics more tuned for selected compression level,
resulting in a _small_ compression ratio improvement for this level.
* `-B#`:
Split input files in blocks of size # (default: no split)
* `--dictID=#`:
A dictionary ID is a locally unique ID that a decoder can use to verify it is
using the right dictionary.
By default, zstd will create a 4-bytes random number ID.
It's possible to give a precise number instead.
Short numbers have an advantage : an ID < 256 will only need 1 byte in the
compressed frame header, and an ID < 65536 will only need 2 bytes.
This compares favorably to 4 bytes default.
However, it's up to the dictionary manager to not assign twice the same ID to
2 different dictionaries.
* `--train-cover[=k#,d=#,steps=#,split=#,shrink[=#]]`:
Select parameters for the default dictionary builder algorithm named cover.
If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
If _k_ is not specified, then it tries _steps_ values in the range [50, 2000].
If _steps_ is not specified, then the default value of 40 is used.
If _split_ is not specified or split <= 0, then the default value of 100 is used.
Requires that _d_ <= _k_.
If _shrink_ flag is not used, then the default value for _shrinkDict_ of 0 is used.
If _shrink_ is not specified, then the default value for _shrinkDictMaxRegression_ of 1 is used.
Selects segments of size _k_ with highest score to put in the dictionary.
The score of a segment is computed by the sum of the frequencies of all the
subsegments of size _d_.
Generally _d_ should be in the range [6, 8], occasionally up to 16, but the
algorithm will run faster with d <= _8_.
Good values for _k_ vary widely based on the input data, but a safe range is
[2 * _d_, 2000].
If _split_ is 100, all input samples are used for both training and testing
to find optimal _d_ and _k_ to build dictionary.
Supports multithreading if `zstd` is compiled with threading support.
Having _shrink_ enabled takes a truncated dictionary of minimum size and doubles
in size until compression ratio of the truncated dictionary is at most
_shrinkDictMaxRegression%_ worse than the compression ratio of the largest dictionary.
Examples:
`zstd --train-cover FILEs`
`zstd --train-cover=k=50,d=8 FILEs`
`zstd --train-cover=d=8,steps=500 FILEs`
`zstd --train-cover=k=50 FILEs`
`zstd --train-cover=k=50,split=60 FILEs`
`zstd --train-cover=shrink FILEs`
`zstd --train-cover=shrink=2 FILEs`
* `--train-fastcover[=k#,d=#,f=#,steps=#,split=#,accel=#]`:
Same as cover but with extra parameters _f_ and _accel_ and different default value of split
If _split_ is not specified, then it tries _split_ = 75.
If _f_ is not specified, then it tries _f_ = 20.
Requires that 0 < _f_ < 32.
If _accel_ is not specified, then it tries _accel_ = 1.
Requires that 0 < _accel_ <= 10.
Requires that _d_ = 6 or _d_ = 8.
_f_ is log of size of array that keeps track of frequency of subsegments of size _d_.
The subsegment is hashed to an index in the range [0,2^_f_ - 1].
It is possible that 2 different subsegments are hashed to the same index, and they are considered as the same subsegment when computing frequency.
Using a higher _f_ reduces collision but takes longer.
Examples:
`zstd --train-fastcover FILEs`
`zstd --train-fastcover=d=8,f=15,accel=2 FILEs`
* `--train-legacy[=selectivity=#]`:
Use legacy dictionary builder algorithm with the given dictionary
_selectivity_ (default: 9).
The smaller the _selectivity_ value, the denser the dictionary,
improving its efficiency but reducing its possible maximum size.
`--train-legacy=s=#` is also accepted.
Examples:
`zstd --train-legacy FILEs`
`zstd --train-legacy=selectivity=8 FILEs`
BENCHMARK
---------
* `-b#`:
benchmark file(s) using compression level #
* `-e#`:
benchmark file(s) using multiple compression levels, from `-b#` to `-e#` (inclusive)
* `-i#`:
minimum evaluation time, in seconds (default: 3s), benchmark mode only
* `-B#`, `--block-size=#`:
cut file(s) into independent blocks of size # (default: no block)
* `--priority=rt`:
set process priority to real-time
**Output Format:** CompressionLevel#Filename : IntputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
**Methodology:** For both compression and decompression speed, the entire input is compressed/decompressed in-memory to measure speed. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy.
ADVANCED COMPRESSION OPTIONS
----------------------------
### -B#:
Select the size of each compression job.
This parameter is only available when multi-threading is enabled.
Each compression job is run in parallel, so this value indirectly impacts the nb of active threads.
Default job size varies depending on compression level (generally `4 * windowSize`).
`-B#` makes it possible to manually select a custom size.
Note that job size must respect a minimum value which is enforced transparently.
This minimum is either 512 KB, or `overlapSize`, whichever is largest.
Different job sizes will lead to (slightly) different compressed frames.
### --zstd[=options]:
`zstd` provides 22 predefined compression levels.
The selected or default predefined compression level can be changed with
advanced compression options.
The _options_ are provided as a comma-separated list.
You may specify only the options you want to change and the rest will be
taken from the selected or default compression level.
The list of available _options_:
- `strategy`=_strat_, `strat`=_strat_:
Specify a strategy used by a match finder.
There are 9 strategies numbered from 1 to 9, from faster to stronger:
1=ZSTD\_fast, 2=ZSTD\_dfast, 3=ZSTD\_greedy,
4=ZSTD\_lazy, 5=ZSTD\_lazy2, 6=ZSTD\_btlazy2,
7=ZSTD\_btopt, 8=ZSTD\_btultra, 9=ZSTD\_btultra2.
- `windowLog`=_wlog_, `wlog`=_wlog_:
Specify the maximum number of bits for a match distance.
The higher number of increases the chance to find a match which usually
improves compression ratio.
It also increases memory requirements for the compressor and decompressor.
The minimum _wlog_ is 10 (1 KiB) and the maximum is 30 (1 GiB) on 32-bit
platforms and 31 (2 GiB) on 64-bit platforms.
Note: If `windowLog` is set to larger than 27, `--long=windowLog` or
`--memory=windowSize` needs to be passed to the decompressor.
- `hashLog`=_hlog_, `hlog`=_hlog_:
Specify the maximum number of bits for a hash table.
Bigger hash tables cause less collisions which usually makes compression
faster, but requires more memory during compression.
The minimum _hlog_ is 6 (64 B) and the maximum is 30 (1 GiB).
- `chainLog`=_clog_, `clog`=_clog_:
Specify the maximum number of bits for a hash chain or a binary tree.
Higher numbers of bits increases the chance to find a match which usually
improves compression ratio.
It also slows down compression speed and increases memory requirements for
compression.
This option is ignored for the ZSTD_fast strategy.
The minimum _clog_ is 6 (64 B) and the maximum is 29 (524 Mib) on 32-bit platforms
and 30 (1 Gib) on 64-bit platforms.
- `searchLog`=_slog_, `slog`=_slog_:
Specify the maximum number of searches in a hash chain or a binary tree
using logarithmic scale.
More searches increases the chance to find a match which usually increases
compression ratio but decreases compression speed.
The minimum _slog_ is 1 and the maximum is 'windowLog' - 1.
- `minMatch`=_mml_, `mml`=_mml_:
Specify the minimum searched length of a match in a hash table.
Larger search lengths usually decrease compression ratio but improve
decompression speed.
The minimum _mml_ is 3 and the maximum is 7.
- `targetLength`=_tlen_, `tlen`=_tlen_:
The impact of this field vary depending on selected strategy.
For ZSTD\_btopt, ZSTD\_btultra and ZSTD\_btultra2, it specifies
the minimum match length that causes match finder to stop searching.
A larger `targetLength` usually improves compression ratio
but decreases compression speed.
t
For ZSTD\_fast, it triggers ultra-fast mode when > 0.
The value represents the amount of data skipped between match sampling.
Impact is reversed : a larger `targetLength` increases compression speed
but decreases compression ratio.
For all other strategies, this field has no impact.
The minimum _tlen_ is 0 and the maximum is 128 Kib.
- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
Determine `overlapSize`, amount of data reloaded from previous job.
This parameter is only available when multithreading is enabled.
Reloading more data improves compression ratio, but decreases speed.
The minimum _ovlog_ is 0, and the maximum is 9.
1 means "no overlap", hence completely independent jobs.
9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
For example, 8 means "windowSize/2", and 6 means "windowSize/8".
Value 0 is special and means "default" : _ovlog_ is automatically determined by `zstd`.
In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
Specify the maximum size for a hash table used for long distance matching.
This option is ignored unless long distance matching is enabled.
Bigger hash tables usually improve compression ratio at the expense of more
memory during compression and a decrease in compression speed.
The minimum _lhlog_ is 6 and the maximum is 30 (default: 20).
- `ldmMinMatch`=_lmml_, `lmml`=_lmml_:
Specify the minimum searched length of a match for long distance matching.
This option is ignored unless long distance matching is enabled.
Larger/very small values usually decrease compression ratio.
The minimum _lmml_ is 4 and the maximum is 4096 (default: 64).
- `ldmBucketSizeLog`=_lblog_, `lblog`=_lblog_:
Specify the size of each bucket for the hash table used for long distance
matching.
This option is ignored unless long distance matching is enabled.
Larger bucket sizes improve collision resolution but decrease compression
speed.
The minimum _lblog_ is 1 and the maximum is 8 (default: 3).
- `ldmHashRateLog`=_lhrlog_, `lhrlog`=_lhrlog_:
Specify the frequency of inserting entries into the long distance matching
hash table.
This option is ignored unless long distance matching is enabled.
Larger values will improve compression speed. Deviating far from the
default value will likely result in a decrease in compression ratio.
The default value is `wlog - lhlog`.
### Example
The following parameters sets advanced compression options to something
similar to predefined level 19 for files bigger than 256 KB:
`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
BUGS
----
Report bugs at: https://github.com/facebook/zstd/issues
AUTHOR
------
Yann Collet

1427
dependencies/zstd-1.5.0/programs/zstdcli.c vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,172 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstdcli_trace.h"
#include <stdio.h>
#include <stdlib.h>
#include "timefn.h"
#include "util.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "../lib/zstd.h"
/* We depend on the trace header to avoid duplicating the ZSTD_trace struct.
* But, we check the version so it is compatible with dynamic linking.
*/
#include "../lib/common/zstd_trace.h"
/* We only use macros from threading.h so it is compatible with dynamic linking */
#include "../lib/common/threading.h"
#if ZSTD_TRACE
static FILE* g_traceFile = NULL;
static int g_mutexInit = 0;
static ZSTD_pthread_mutex_t g_mutex;
static UTIL_time_t g_enableTime = UTIL_TIME_INITIALIZER;
void TRACE_enable(char const* filename)
{
int const writeHeader = !UTIL_isRegularFile(filename);
if (g_traceFile)
fclose(g_traceFile);
g_traceFile = fopen(filename, "a");
if (g_traceFile && writeHeader) {
/* Fields:
* algorithm
* version
* method
* streaming
* level
* workers
* dictionary size
* uncompressed size
* compressed size
* duration nanos
* compression ratio
* speed MB/s
*/
fprintf(g_traceFile, "Algorithm, Version, Method, Mode, Level, Workers, Dictionary Size, Uncompressed Size, Compressed Size, Duration Nanos, Compression Ratio, Speed MB/s\n");
}
g_enableTime = UTIL_getTime();
if (!g_mutexInit) {
if (!ZSTD_pthread_mutex_init(&g_mutex, NULL)) {
g_mutexInit = 1;
} else {
TRACE_finish();
}
}
}
void TRACE_finish(void)
{
if (g_traceFile) {
fclose(g_traceFile);
}
g_traceFile = NULL;
if (g_mutexInit) {
ZSTD_pthread_mutex_destroy(&g_mutex);
g_mutexInit = 0;
}
}
static void TRACE_log(char const* method, PTime duration, ZSTD_Trace const* trace)
{
int level = 0;
int workers = 0;
double const ratio = (double)trace->uncompressedSize / (double)trace->compressedSize;
double const speed = ((double)trace->uncompressedSize * 1000) / (double)duration;
if (trace->params) {
ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_compressionLevel, &level);
ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_nbWorkers, &workers);
}
assert(g_traceFile != NULL);
ZSTD_pthread_mutex_lock(&g_mutex);
/* Fields:
* algorithm
* version
* method
* streaming
* level
* workers
* dictionary size
* uncompressed size
* compressed size
* duration nanos
* compression ratio
* speed MB/s
*/
fprintf(g_traceFile,
"zstd, %u, %s, %s, %d, %d, %llu, %llu, %llu, %llu, %.2f, %.2f\n",
trace->version,
method,
trace->streaming ? "streaming" : "single-pass",
level,
workers,
(unsigned long long)trace->dictionarySize,
(unsigned long long)trace->uncompressedSize,
(unsigned long long)trace->compressedSize,
(unsigned long long)duration,
ratio,
speed);
ZSTD_pthread_mutex_unlock(&g_mutex);
}
/**
* These symbols override the weak symbols provided by the library.
*/
ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx)
{
(void)cctx;
if (g_traceFile == NULL)
return 0;
return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime);
}
void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
{
PTime const beginNanos = (PTime)ctx;
PTime const endNanos = UTIL_clockSpanNano(g_enableTime);
PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0;
assert(g_traceFile != NULL);
assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */
TRACE_log("compress", durationNanos, trace);
}
ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx)
{
(void)dctx;
if (g_traceFile == NULL)
return 0;
return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime);
}
void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
{
PTime const beginNanos = (PTime)ctx;
PTime const endNanos = UTIL_clockSpanNano(g_enableTime);
PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0;
assert(g_traceFile != NULL);
assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */
TRACE_log("decompress", durationNanos, trace);
}
#else /* ZSTD_TRACE */
void TRACE_enable(char const* filename)
{
(void)filename;
}
void TRACE_finish(void) {}
#endif /* ZSTD_TRACE */

View file

@ -0,0 +1,24 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTDCLI_TRACE_H
#define ZSTDCLI_TRACE_H
/**
* Enable tracing - log to filename.
*/
void TRACE_enable(char const* filename);
/**
* Shut down the tracing library.
*/
void TRACE_finish(void);
#endif /* ZSTDCLI_TRACE_H */

134
dependencies/zstd-1.5.0/programs/zstdgrep vendored Executable file
View file

@ -0,0 +1,134 @@
#!/bin/sh
#
# Copyright (c) 2003 Thomas Klausner.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
grep=${GREP:-grep}
zcat=${ZCAT:-zstdcat}
endofopts=0
pattern_found=0
grep_args=""
hyphen=0
silent=0
prog=${0##*/}
# handle being called 'zegrep' or 'zfgrep'
case $prog in
*egrep*) prog=zegrep; grep_args='-E';;
*fgrep*) prog=zfgrep; grep_args='-F';;
*) prog=zstdgrep;;
esac
# skip all options and pass them on to grep taking care of options
# with arguments, and if -e was supplied
while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do
case "$1" in
# from GNU grep-2.5.1 -- keep in sync!
-[ABCDXdefm])
if [ "$#" -lt 2 ]; then
printf '%s: missing argument for %s flag\n' "${prog}" "$1" >&2
exit 1
fi
case "$1" in
-e)
pattern="$2"
pattern_found=1
shift 2
break
;;
-f)
pattern_found=2
;;
*)
;;
esac
grep_args="${grep_args} $1 $2"
shift 2
;;
--)
shift
endofopts=1
;;
-)
hyphen=1
shift
;;
-h)
silent=1
shift
;;
-*)
grep_args="${grep_args} $1"
shift
;;
*)
# pattern to grep for
endofopts=1
;;
esac
done
# if no -e option was found, take next argument as grep-pattern
if [ "${pattern_found}" -lt 1 ]; then
if [ "$#" -ge 1 ]; then
pattern="$1"
shift
elif [ "${hyphen}" -gt 0 ]; then
pattern="-"
else
printf '%s: missing pattern\n' "${prog}" >&2
exit 1
fi
fi
EXIT_CODE=0
# call grep ...
if [ "$#" -lt 1 ]; then
# ... on stdin
set -f # Disable file name generation (globbing).
# shellcheck disable=SC2086
"${zcat}" - | "${grep}" ${grep_args} -- "${pattern}" -
EXIT_CODE=$?
set +f
else
# ... on all files given on the command line
if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then
grep_args="-H ${grep_args}"
fi
set -f
while [ "$#" -gt 0 ]; do
# shellcheck disable=SC2086
if [ $pattern_found -eq 2 ]; then
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- -
else
"${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" -
fi
[ "$?" -ne 0 ] && EXIT_CODE=1
shift
done
set +f
fi
exit "${EXIT_CODE}"

View file

@ -0,0 +1,23 @@
.
.TH "ZSTDGREP" "1" "May 2021" "zstd 1.5.0" "User Commands"
.
.SH "NAME"
\fBzstdgrep\fR \- print lines matching a pattern in zstandard\-compressed files
.
.SH "SYNOPSIS"
\fBzstdgrep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.]
.
.SH "DESCRIPTION"
\fBzstdgrep\fR runs \fBgrep (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
.
.P
The grep\-flags and pattern arguments are passed on to \fBgrep (1)\fR\. If an \fB\-e\fR flag is found in the \fBgrep\-flags\fR, \fBzstdgrep\fR will not look for a pattern argument\.
.
.SH "EXIT STATUS"
In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\.
.
.SH "SEE ALSO"
\fBzstd (1)\fR
.
.SH "AUTHORS"
Thomas Klausner \fIwiz@NetBSD\.org\fR

View file

@ -0,0 +1,26 @@
zstdgrep(1) -- print lines matching a pattern in zstandard-compressed files
============================================================================
SYNOPSIS
--------
`zstdgrep` [*grep-flags*] [--] _pattern_ [_files_ ...]
DESCRIPTION
-----------
`zstdgrep` runs `grep (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`.
The grep-flags and pattern arguments are passed on to `grep (1)`. If an `-e` flag is found in the `grep-flags`, `zstdgrep` will not look for a pattern argument.
EXIT STATUS
-----------
In case of missing arguments or missing pattern, 1 will be returned, otherwise 0.
SEE ALSO
--------
`zstd (1)`
AUTHORS
-------
Thomas Klausner <wiz@NetBSD.org>

2
dependencies/zstd-1.5.0/programs/zstdless vendored Executable file
View file

@ -0,0 +1,2 @@
#!/bin/sh
zstdcat "$@" | less

View file

@ -0,0 +1,14 @@
.
.TH "ZSTDLESS" "1" "May 2021" "zstd 1.5.0" "User Commands"
.
.SH "NAME"
\fBzstdless\fR \- view zstandard\-compressed files
.
.SH "SYNOPSIS"
\fBzstdless\fR [\fIflags\fR] [\fIfile\fR \.\.\.]
.
.SH "DESCRIPTION"
\fBzstdless\fR runs \fBless (1)\fR on files or stdin, if no files argument is given, after decompressing them with \fBzstdcat (1)\fR\.
.
.SH "SEE ALSO"
\fBzstd (1)\fR

View file

@ -0,0 +1,16 @@
zstdless(1) -- view zstandard-compressed files
============================================================================
SYNOPSIS
--------
`zstdless` [*flags*] [_file_ ...]
DESCRIPTION
-----------
`zstdless` runs `less (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`.
SEE ALSO
--------
`zstd (1)`