Bump bundled libarchive version to 3.5.2

- Update bunlded libarchive version used on Windows/Mac
- Enable requested zstd support while we are at it. Closes #211
This commit is contained in:
Floris Bos 2021-12-09 12:22:14 +01:00
parent 03e083b4f3
commit 67618a2eac
1869 changed files with 166685 additions and 9489 deletions

View file

@ -0,0 +1,3 @@
## Project Support Notice
The VS2005 Project directory has been moved to the contrib directory in order to indicate that it will no longer be supported.

View file

@ -0,0 +1,440 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="fullbench"
ProjectGUID="{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
RootNamespace="fullbench"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\tests\fullbench.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -0,0 +1,488 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="fuzzer"
ProjectGUID="{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
RootNamespace="fuzzer"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\programs"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\tests\fuzzer.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\pool.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -0,0 +1,55 @@
Microsoft Visual Studio Solution File, Format Version 9.00
# Visual C++ Express 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstd", "zstd\zstd.vcproj", "{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fuzzer", "fuzzer\fuzzer.vcproj", "{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench", "fullbench\fullbench.vcproj", "{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdlib", "zstdlib\zstdlib.vcproj", "{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Debug|x64 = Debug|x64
Release|Win32 = Release|Win32
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.ActiveCfg = Debug|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|Win32.Build.0 = Debug|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.ActiveCfg = Debug|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Debug|x64.Build.0 = Debug|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.ActiveCfg = Release|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|Win32.Build.0 = Release|Win32
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.ActiveCfg = Release|x64
{1A2AB08E-5CE7-4C5B-BE55-458157C14051}.Release|x64.Build.0 = Release|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.ActiveCfg = Debug|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|Win32.Build.0 = Debug|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.ActiveCfg = Debug|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Debug|x64.Build.0 = Debug|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.ActiveCfg = Release|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|Win32.Build.0 = Release|Win32
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.ActiveCfg = Release|x64
{A62E89D2-9DDE-42BA-8F9B-9DA74889A6B0}.Release|x64.Build.0 = Release|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.ActiveCfg = Debug|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|Win32.Build.0 = Debug|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.ActiveCfg = Debug|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Debug|x64.Build.0 = Debug|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.ActiveCfg = Release|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|Win32.Build.0 = Release|Win32
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.ActiveCfg = Release|x64
{CC8F1D1B-BA2F-43E3-A71F-FA415D81AAD3}.Release|x64.Build.0 = Release|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.ActiveCfg = Debug|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|Win32.Build.0 = Debug|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.ActiveCfg = Debug|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Debug|x64.Build.0 = Debug|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.ActiveCfg = Release|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|Win32.Build.0 = Release|Win32
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.ActiveCfg = Release|x64
{99DE2A79-7298-4004-A0ED-030D7A3796CA}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View file

@ -0,0 +1,548 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="zstd"
ProjectGUID="{1A2AB08E-5CE7-4C5B-BE55-458157C14051}"
RootNamespace="zstd"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\lib\dictBuilder;$(SolutionDir)..\..\lib\compress"
PreprocessorDefinitions="ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="setargv.obj"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\programs\bench.c"
>
</File>
<File
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\programs\fileio.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.c"
>
</File>
<File
RelativePath="..\..\..\programs\zstdcli.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -0,0 +1,546 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="zstdlib"
ProjectGUID="{99DE2A79-7298-4004-A0ED-030D7A3796CA}"
RootNamespace="zstdlib"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="4"
WarnAsError="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(SolutionDir)bin\$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="2"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
AdditionalIncludeDirectories="$(SolutionDir)..\..\lib;$(SolutionDir)..\..\lib\common;$(SolutionDir)..\..\lib\legacy;$(SolutionDir)..\..\programs\legacy;$(SolutionDir)..\..\lib\dictBuilder"
PreprocessorDefinitions="ZSTD_DLL_EXPORT=1;ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=4;WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="0"
EnableFunctionLevelLinking="true"
UsePrecompiledHeader="0"
WarningLevel="4"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\entropy_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\fse_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\huf_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\huf_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict.c"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_common.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_compress.c"
>
</File>
<File
RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.c"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.c"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath="..\..\..\lib\common\bitstream.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\pool.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\threading.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\error_private.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd_errors.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\fse_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\huf_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\mem.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\xxhash.h"
>
</File>
<File
RelativePath="..\..\..\lib\zdict.h"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\zdict_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\zstd.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_internal.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_legacy.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstd_opt.h"
>
</File>
<File
RelativePath="..\..\..\lib\compress\zstdmt_compress.h"
>
</File>
<File
RelativePath="..\..\..\lib\common\zstd_static.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v01.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v02.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v03.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v04.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v05.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v06.h"
>
</File>
<File
RelativePath="..\..\..\lib\legacy\zstd_v07.h"
>
</File>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

2
dependencies/zstd-1.5.0/contrib/cleanTabs vendored Executable file
View file

@ -0,0 +1,2 @@
#!/bin/sh
sed -i '' $'s/\t/ /g' ../lib/**/*.{h,c} ../programs/*.{h,c} ../tests/*.c ./**/*.{h,cpp} ../examples/*.c ../zlibWrapper/*.{h,c}

View file

@ -0,0 +1 @@
check_flipped_bits

View file

@ -0,0 +1,35 @@
# ################################################################
# Copyright (c) 2019-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
.PHONY: all
all: check_flipped_bits
ZSTDLIBDIR ?= ../../lib
CFLAGS ?= -O3
CFLAGS += -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZSTDLIBDIR)/compress \
-I$(ZSTDLIBDIR)/decompress
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-Wstrict-prototypes -Wundef \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls -Wmissing-prototypes
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
.PHONY: $(ZSTDLIBDIR)/libzstd.a
$(ZSTDLIBDIR)/libzstd.a:
$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
check_flipped_bits: check_flipped_bits.c $(ZSTDLIBDIR)/libzstd.a
$(CC) $(FLAGS) $< -o $@$(EXT) $(ZSTDLIBDIR)/libzstd.a
.PHONY: clean
clean:
rm -f check_flipped_bits

View file

@ -0,0 +1,400 @@
/*
* Copyright (c) 2019-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zstd_errors.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
typedef struct {
char *input;
size_t input_size;
char *perturbed; /* same size as input */
char *output;
size_t output_size;
const char *dict_file_name;
const char *dict_file_dir_name;
int32_t dict_id;
char *dict;
size_t dict_size;
ZSTD_DDict* ddict;
ZSTD_DCtx* dctx;
int success_count;
int error_counts[ZSTD_error_maxCode];
} stuff_t;
static void free_stuff(stuff_t* stuff) {
free(stuff->input);
free(stuff->output);
ZSTD_freeDDict(stuff->ddict);
free(stuff->dict);
ZSTD_freeDCtx(stuff->dctx);
}
static void usage(void) {
fprintf(stderr, "check_flipped_bits input_filename [-d dict] [-D dict_dir]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Arguments:\n");
fprintf(stderr, " -d file: path to a dictionary file to use.\n");
fprintf(stderr, " -D dir : path to a directory, with files containing dictionaries, of the\n"
" form DICTID.zstd-dict, e.g., 12345.zstd-dict.\n");
exit(1);
}
static void print_summary(stuff_t* stuff) {
int error_code;
fprintf(stderr, "%9d successful decompressions\n", stuff->success_count);
for (error_code = 0; error_code < ZSTD_error_maxCode; error_code++) {
int count = stuff->error_counts[error_code];
if (count) {
fprintf(
stderr, "%9d failed decompressions with message: %s\n",
count, ZSTD_getErrorString(error_code));
}
}
}
static char* readFile(const char* filename, size_t* size) {
struct stat statbuf;
int ret;
FILE* f;
char *buf;
size_t bytes_read;
ret = stat(filename, &statbuf);
if (ret != 0) {
fprintf(stderr, "stat failed: %m\n");
return NULL;
}
if ((statbuf.st_mode & S_IFREG) != S_IFREG) {
fprintf(stderr, "Input must be regular file\n");
return NULL;
}
*size = statbuf.st_size;
f = fopen(filename, "r");
if (f == NULL) {
fprintf(stderr, "fopen failed: %m\n");
return NULL;
}
buf = malloc(*size);
if (buf == NULL) {
fprintf(stderr, "malloc failed\n");
fclose(f);
return NULL;
}
bytes_read = fread(buf, 1, *size, f);
if (bytes_read != *size) {
fprintf(stderr, "failed to read whole file\n");
fclose(f);
free(buf);
return NULL;
}
ret = fclose(f);
if (ret != 0) {
fprintf(stderr, "fclose failed: %m\n");
free(buf);
return NULL;
}
return buf;
}
static ZSTD_DDict* readDict(const char* filename, char **buf, size_t* size, int32_t* dict_id) {
ZSTD_DDict* ddict;
*buf = readFile(filename, size);
if (*buf == NULL) {
fprintf(stderr, "Opening dictionary file '%s' failed\n", filename);
return NULL;
}
ddict = ZSTD_createDDict_advanced(*buf, *size, ZSTD_dlm_byRef, ZSTD_dct_auto, ZSTD_defaultCMem);
if (ddict == NULL) {
fprintf(stderr, "Failed to create ddict.\n");
return NULL;
}
if (dict_id != NULL) {
*dict_id = ZSTD_getDictID_fromDDict(ddict);
}
return ddict;
}
static ZSTD_DDict* readDictByID(stuff_t *stuff, int32_t dict_id, char **buf, size_t* size) {
if (stuff->dict_file_dir_name == NULL) {
return NULL;
} else {
size_t dir_name_len = strlen(stuff->dict_file_dir_name);
int dir_needs_separator = 0;
size_t dict_file_name_alloc_size = dir_name_len + 1 /* '/' */ + 10 /* max int32_t len */ + strlen(".zstd-dict") + 1 /* '\0' */;
char *dict_file_name = malloc(dict_file_name_alloc_size);
ZSTD_DDict* ddict;
int32_t read_dict_id;
if (dict_file_name == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
if (dir_name_len > 0 && stuff->dict_file_dir_name[dir_name_len - 1] != '/') {
dir_needs_separator = 1;
}
snprintf(
dict_file_name,
dict_file_name_alloc_size,
"%s%s%u.zstd-dict",
stuff->dict_file_dir_name,
dir_needs_separator ? "/" : "",
dict_id);
/* fprintf(stderr, "Loading dict %u from '%s'.\n", dict_id, dict_file_name); */
ddict = readDict(dict_file_name, buf, size, &read_dict_id);
if (ddict == NULL) {
fprintf(stderr, "Failed to create ddict from '%s'.\n", dict_file_name);
free(dict_file_name);
return 0;
}
if (read_dict_id != dict_id) {
fprintf(stderr, "Read dictID (%u) does not match expected (%u).\n", read_dict_id, dict_id);
free(dict_file_name);
ZSTD_freeDDict(ddict);
return 0;
}
free(dict_file_name);
return ddict;
}
}
static int init_stuff(stuff_t* stuff, int argc, char *argv[]) {
const char* input_filename;
if (argc < 2) {
usage();
}
input_filename = argv[1];
stuff->input_size = 0;
stuff->input = readFile(input_filename, &stuff->input_size);
if (stuff->input == NULL) {
fprintf(stderr, "Failed to read input file.\n");
return 0;
}
stuff->perturbed = malloc(stuff->input_size);
if (stuff->perturbed == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
memcpy(stuff->perturbed, stuff->input, stuff->input_size);
stuff->output_size = ZSTD_DStreamOutSize();
stuff->output = malloc(stuff->output_size);
if (stuff->output == NULL) {
fprintf(stderr, "malloc failed.\n");
return 0;
}
stuff->dict_file_name = NULL;
stuff->dict_file_dir_name = NULL;
stuff->dict_id = 0;
stuff->dict = NULL;
stuff->dict_size = 0;
stuff->ddict = NULL;
if (argc > 2) {
if (!strcmp(argv[2], "-d")) {
if (argc > 3) {
stuff->dict_file_name = argv[3];
} else {
usage();
}
} else
if (!strcmp(argv[2], "-D")) {
if (argc > 3) {
stuff->dict_file_dir_name = argv[3];
} else {
usage();
}
} else {
usage();
}
}
if (stuff->dict_file_dir_name) {
int32_t dict_id = ZSTD_getDictID_fromFrame(stuff->input, stuff->input_size);
if (dict_id != 0) {
stuff->ddict = readDictByID(stuff, dict_id, &stuff->dict, &stuff->dict_size);
if (stuff->ddict == NULL) {
fprintf(stderr, "Failed to create cached ddict.\n");
return 0;
}
stuff->dict_id = dict_id;
}
} else
if (stuff->dict_file_name) {
stuff->ddict = readDict(stuff->dict_file_name, &stuff->dict, &stuff->dict_size, &stuff->dict_id);
if (stuff->ddict == NULL) {
fprintf(stderr, "Failed to create ddict from '%s'.\n", stuff->dict_file_name);
return 0;
}
}
stuff->dctx = ZSTD_createDCtx();
if (stuff->dctx == NULL) {
return 0;
}
stuff->success_count = 0;
memset(stuff->error_counts, 0, sizeof(stuff->error_counts));
return 1;
}
static int test_decompress(stuff_t* stuff) {
size_t ret;
ZSTD_inBuffer in = {stuff->perturbed, stuff->input_size, 0};
ZSTD_outBuffer out = {stuff->output, stuff->output_size, 0};
ZSTD_DCtx* dctx = stuff->dctx;
int32_t custom_dict_id = ZSTD_getDictID_fromFrame(in.src, in.size);
char *custom_dict = NULL;
size_t custom_dict_size = 0;
ZSTD_DDict* custom_ddict = NULL;
if (custom_dict_id != 0 && custom_dict_id != stuff->dict_id) {
/* fprintf(stderr, "Instead of dict %u, this perturbed blob wants dict %u.\n", stuff->dict_id, custom_dict_id); */
custom_ddict = readDictByID(stuff, custom_dict_id, &custom_dict, &custom_dict_size);
}
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
if (custom_ddict != NULL) {
ZSTD_DCtx_refDDict(dctx, custom_ddict);
} else {
ZSTD_DCtx_refDDict(dctx, stuff->ddict);
}
while (in.pos != in.size) {
out.pos = 0;
ret = ZSTD_decompressStream(dctx, &out, &in);
if (ZSTD_isError(ret)) {
unsigned int code = ZSTD_getErrorCode(ret);
if (code >= ZSTD_error_maxCode) {
fprintf(stderr, "Received unexpected error code!\n");
exit(1);
}
stuff->error_counts[code]++;
/*
fprintf(
stderr, "Decompression failed: %s\n", ZSTD_getErrorName(ret));
*/
if (custom_ddict != NULL) {
ZSTD_freeDDict(custom_ddict);
free(custom_dict);
}
return 0;
}
}
stuff->success_count++;
if (custom_ddict != NULL) {
ZSTD_freeDDict(custom_ddict);
free(custom_dict);
}
return 1;
}
static int perturb_bits(stuff_t* stuff) {
size_t pos;
size_t bit;
for (pos = 0; pos < stuff->input_size; pos++) {
unsigned char old_val = stuff->input[pos];
if (pos % 1000 == 0) {
fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
}
for (bit = 0; bit < 8; bit++) {
unsigned char new_val = old_val ^ (1 << bit);
stuff->perturbed[pos] = new_val;
if (test_decompress(stuff)) {
fprintf(
stderr,
"Flipping byte %zu bit %zu (0x%02x -> 0x%02x) "
"produced a successful decompression!\n",
pos, bit, old_val, new_val);
}
}
stuff->perturbed[pos] = old_val;
}
return 1;
}
static int perturb_bytes(stuff_t* stuff) {
size_t pos;
size_t new_val;
for (pos = 0; pos < stuff->input_size; pos++) {
unsigned char old_val = stuff->input[pos];
if (pos % 1000 == 0) {
fprintf(stderr, "Perturbing byte %zu / %zu\n", pos, stuff->input_size);
}
for (new_val = 0; new_val < 256; new_val++) {
stuff->perturbed[pos] = new_val;
if (test_decompress(stuff)) {
fprintf(
stderr,
"Changing byte %zu (0x%02x -> 0x%02x) "
"produced a successful decompression!\n",
pos, old_val, (unsigned char)new_val);
}
}
stuff->perturbed[pos] = old_val;
}
return 1;
}
int main(int argc, char* argv[]) {
stuff_t stuff;
if(!init_stuff(&stuff, argc, argv)) {
fprintf(stderr, "Failed to init.\n");
return 1;
}
if (test_decompress(&stuff)) {
fprintf(stderr, "Blob already decompresses successfully!\n");
return 1;
}
perturb_bits(&stuff);
perturb_bytes(&stuff);
print_summary(&stuff);
free_stuff(&stuff);
return 0;
}

View file

@ -0,0 +1,20 @@
# Dockerfile
# First image to build the binary
FROM alpine as builder
RUN apk --no-cache add make gcc libc-dev
COPY . /src
RUN mkdir /pkg && cd /src && make && make DESTDIR=/pkg install
# Second minimal image to only keep the built binary
FROM alpine
# Copy the built files
COPY --from=builder /pkg /
# Copy the license as well
RUN mkdir -p /usr/local/share/licenses/zstd
COPY --from=builder /src/LICENSE /usr/local/share/licences/zstd/
# Just run `zstd` if no other command is given
CMD ["/usr/local/bin/zstd"]

View file

@ -0,0 +1,20 @@
## Requirement
The `Dockerfile` script requires a version of `docker` >= 17.05
## Installing docker
The official docker install docs use a ppa with a modern version available:
https://docs.docker.com/install/linux/docker-ce/ubuntu/
## How to run
`docker build -t zstd .`
## test
```
echo foo | docker run -i --rm zstd | docker run -i --rm zstd zstdcat
foo
```

View file

@ -0,0 +1,749 @@
#!/usr/bin/env python3
# ################################################################
# Copyright (c) 2021-2021, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ##########################################################################
import argparse
import contextlib
import os
import re
import shutil
import sys
from typing import Optional
INCLUDED_SUBDIRS = ["common", "compress", "decompress"]
SKIPPED_FILES = [
"common/mem.h",
"common/zstd_deps.h",
"common/pool.c",
"common/pool.h",
"common/threading.c",
"common/threading.h",
"common/zstd_trace.c",
"common/zstd_trace.h",
"compress/zstdmt_compress.h",
"compress/zstdmt_compress.c",
]
XXHASH_FILES = [
"common/xxhash.c",
"common/xxhash.h",
]
class FileLines(object):
def __init__(self, filename):
self.filename = filename
with open(self.filename, "r") as f:
self.lines = f.readlines()
def write(self):
with open(self.filename, "w") as f:
f.write("".join(self.lines))
class PartialPreprocessor(object):
"""
Looks for simple ifdefs and ifndefs and replaces them.
Handles && and ||.
Has fancy logic to handle translating elifs to ifs.
Only looks for macros in the first part of the expression with no
parens.
Does not handle multi-line macros (only looks in first line).
"""
def __init__(self, defs: [(str, Optional[str])], replaces: [(str, str)], undefs: [str]):
MACRO_GROUP = r"(?P<macro>[a-zA-Z_][a-zA-Z_0-9]*)"
ELIF_GROUP = r"(?P<elif>el)?"
OP_GROUP = r"(?P<op>&&|\|\|)?"
self._defs = {macro:value for macro, value in defs}
self._replaces = {macro:value for macro, value in replaces}
self._defs.update(self._replaces)
self._undefs = set(undefs)
self._define = re.compile(r"\s*#\s*define")
self._if = re.compile(r"\s*#\s*if")
self._elif = re.compile(r"\s*#\s*(?P<elif>el)if")
self._else = re.compile(r"\s*#\s*(?P<else>else)")
self._endif = re.compile(r"\s*#\s*endif")
self._ifdef = re.compile(fr"\s*#\s*if(?P<not>n)?def {MACRO_GROUP}\s*")
self._if_defined = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+(?P<not>!)?\s*defined\s*\(\s*{MACRO_GROUP}\s*\)\s*{OP_GROUP}"
)
self._if_defined_value = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+defined\s*\(\s*{MACRO_GROUP}\s*\)\s*"
fr"(?P<op>&&)\s*"
fr"(?P<openp>\()?\s*"
fr"(?P<macro2>[a-zA-Z_][a-zA-Z_0-9]*)\s*"
fr"(?P<cmp>[=><!]+)\s*"
fr"(?P<value>[0-9]*)\s*"
fr"(?P<closep>\))?\s*"
)
self._if_true = re.compile(
fr"\s*#\s*{ELIF_GROUP}if\s+{MACRO_GROUP}\s*{OP_GROUP}"
)
self._c_comment = re.compile(r"/\*.*?\*/")
self._cpp_comment = re.compile(r"//")
def _log(self, *args, **kwargs):
print(*args, **kwargs)
def _strip_comments(self, line):
# First strip c-style comments (may include //)
while True:
m = self._c_comment.search(line)
if m is None:
break
line = line[:m.start()] + line[m.end():]
# Then strip cpp-style comments
m = self._cpp_comment.search(line)
if m is not None:
line = line[:m.start()]
return line
def _fixup_indentation(self, macro, replace: [str]):
if len(replace) == 0:
return replace
if len(replace) == 1 and self._define.match(replace[0]) is None:
# If there is only one line, only replace defines
return replace
all_pound = True
for line in replace:
if not line.startswith('#'):
all_pound = False
if all_pound:
replace = [line[1:] for line in replace]
min_spaces = len(replace[0])
for line in replace:
spaces = 0
for i, c in enumerate(line):
if c != ' ':
# Non-preprocessor line ==> skip the fixup
if not all_pound and c != '#':
return replace
spaces = i
break
min_spaces = min(min_spaces, spaces)
replace = [line[min_spaces:] for line in replace]
if all_pound:
replace = ["#" + line for line in replace]
return replace
def _handle_if_block(self, macro, idx, is_true, prepend):
"""
Remove the #if or #elif block starting on this line.
"""
REMOVE_ONE = 0
KEEP_ONE = 1
REMOVE_REST = 2
if is_true:
state = KEEP_ONE
else:
state = REMOVE_ONE
line = self._inlines[idx]
is_if = self._if.match(line) is not None
assert is_if or self._elif.match(line) is not None
depth = 0
start_idx = idx
idx += 1
replace = prepend
finished = False
while idx < len(self._inlines):
line = self._inlines[idx]
# Nested if statement
if self._if.match(line):
depth += 1
idx += 1
continue
# We're inside a nested statement
if depth > 0:
if self._endif.match(line):
depth -= 1
idx += 1
continue
# We're at the original depth
# Looking only for an endif.
# We've found a true statement, but haven't
# completely elided the if block, so we just
# remove the remainder.
if state == REMOVE_REST:
if self._endif.match(line):
if is_if:
# Remove the endif because we took the first if
idx += 1
finished = True
break
idx += 1
continue
if state == KEEP_ONE:
m = self._elif.match(line)
if self._endif.match(line):
replace += self._inlines[start_idx + 1:idx]
idx += 1
finished = True
break
if self._elif.match(line) or self._else.match(line):
replace += self._inlines[start_idx + 1:idx]
state = REMOVE_REST
idx += 1
continue
if state == REMOVE_ONE:
m = self._elif.match(line)
if m is not None:
if is_if:
idx += 1
b = m.start('elif')
e = m.end('elif')
assert e - b == 2
replace.append(line[:b] + line[e:])
finished = True
break
m = self._else.match(line)
if m is not None:
if is_if:
idx += 1
while self._endif.match(self._inlines[idx]) is None:
replace.append(self._inlines[idx])
idx += 1
idx += 1
finished = True
break
if self._endif.match(line):
if is_if:
# Remove the endif because no other elifs
idx += 1
finished = True
break
idx += 1
continue
if not finished:
raise RuntimeError("Unterminated if block!")
replace = self._fixup_indentation(macro, replace)
self._log(f"\tHardwiring {macro}")
if start_idx > 0:
self._log(f"\t\t {self._inlines[start_idx - 1][:-1]}")
for x in range(start_idx, idx):
self._log(f"\t\t- {self._inlines[x][:-1]}")
for line in replace:
self._log(f"\t\t+ {line[:-1]}")
if idx < len(self._inlines):
self._log(f"\t\t {self._inlines[idx][:-1]}")
return idx, replace
def _preprocess_once(self):
outlines = []
idx = 0
changed = False
while idx < len(self._inlines):
line = self._inlines[idx]
sline = self._strip_comments(line)
m = self._ifdef.fullmatch(sline)
if_true = False
if m is None:
m = self._if_defined_value.fullmatch(sline)
if m is None:
m = self._if_defined.match(sline)
if m is None:
m = self._if_true.match(sline)
if_true = (m is not None)
if m is None:
outlines.append(line)
idx += 1
continue
groups = m.groupdict()
macro = groups['macro']
op = groups.get('op')
if not (macro in self._defs or macro in self._undefs):
outlines.append(line)
idx += 1
continue
defined = macro in self._defs
# Needed variables set:
# resolved: Is the statement fully resolved?
# is_true: If resolved, is the statement true?
ifdef = False
if if_true:
if not defined:
outlines.append(line)
idx += 1
continue
defined_value = self._defs[macro]
is_int = True
try:
defined_value = int(defined_value)
except TypeError:
is_int = False
except ValueError:
is_int = False
resolved = is_int
is_true = (defined_value != 0)
if resolved and op is not None:
if op == '&&':
resolved = not is_true
else:
assert op == '||'
resolved = is_true
else:
ifdef = groups.get('not') is None
elseif = groups.get('elif') is not None
macro2 = groups.get('macro2')
cmp = groups.get('cmp')
value = groups.get('value')
openp = groups.get('openp')
closep = groups.get('closep')
is_true = (ifdef == defined)
resolved = True
if op is not None:
if op == '&&':
resolved = not is_true
else:
assert op == '||'
resolved = is_true
if macro2 is not None and not resolved:
assert ifdef and defined and op == '&&' and cmp is not None
# If the statment is true, but we have a single value check, then
# check the value.
defined_value = self._defs[macro]
are_ints = True
try:
defined_value = int(defined_value)
value = int(value)
except TypeError:
are_ints = False
except ValueError:
are_ints = False
if (
macro == macro2 and
((openp is None) == (closep is None)) and
are_ints
):
resolved = True
if cmp == '<':
is_true = defined_value < value
elif cmp == '<=':
is_true = defined_value <= value
elif cmp == '==':
is_true = defined_value == value
elif cmp == '!=':
is_true = defined_value != value
elif cmp == '>=':
is_true = defined_value >= value
elif cmp == '>':
is_true = defined_value > value
else:
resolved = False
if op is not None and not resolved:
# Remove the first op in the line + spaces
if op == '&&':
opre = op
else:
assert op == '||'
opre = r'\|\|'
needle = re.compile(fr"(?P<if>\s*#\s*(el)?if\s+).*?(?P<op>{opre}\s*)")
match = needle.match(line)
assert match is not None
newline = line[:match.end('if')] + line[match.end('op'):]
self._log(f"\tHardwiring partially resolved {macro}")
self._log(f"\t\t- {line[:-1]}")
self._log(f"\t\t+ {newline[:-1]}")
outlines.append(newline)
idx += 1
continue
# Skip any statements we cannot fully compute
if not resolved:
outlines.append(line)
idx += 1
continue
prepend = []
if macro in self._replaces:
assert not ifdef
assert op is None
value = self._replaces.pop(macro)
prepend = [f"#define {macro} {value}\n"]
idx, replace = self._handle_if_block(macro, idx, is_true, prepend)
outlines += replace
changed = True
return changed, outlines
def preprocess(self, filename):
with open(filename, 'r') as f:
self._inlines = f.readlines()
changed = True
iters = 0
while changed:
iters += 1
changed, outlines = self._preprocess_once()
self._inlines = outlines
with open(filename, 'w') as f:
f.write(''.join(self._inlines))
class Freestanding(object):
def __init__(
self, zstd_deps: str, mem: str, source_lib: str, output_lib: str,
external_xxhash: bool, xxh64_state: Optional[str],
xxh64_prefix: Optional[str], rewritten_includes: [(str, str)],
defs: [(str, Optional[str])], replaces: [(str, str)],
undefs: [str], excludes: [str], seds: [str],
):
self._zstd_deps = zstd_deps
self._mem = mem
self._src_lib = source_lib
self._dst_lib = output_lib
self._external_xxhash = external_xxhash
self._xxh64_state = xxh64_state
self._xxh64_prefix = xxh64_prefix
self._rewritten_includes = rewritten_includes
self._defs = defs
self._replaces = replaces
self._undefs = undefs
self._excludes = excludes
self._seds = seds
def _dst_lib_file_paths(self):
"""
Yields all the file paths in the dst_lib.
"""
for root, dirname, filenames in os.walk(self._dst_lib):
for filename in filenames:
filepath = os.path.join(root, filename)
yield filepath
def _log(self, *args, **kwargs):
print(*args, **kwargs)
def _copy_file(self, lib_path):
if not (lib_path.endswith(".c") or lib_path.endswith(".h")):
return
if lib_path in SKIPPED_FILES:
self._log(f"\tSkipping file: {lib_path}")
return
if self._external_xxhash and lib_path in XXHASH_FILES:
self._log(f"\tSkipping xxhash file: {lib_path}")
return
src_path = os.path.join(self._src_lib, lib_path)
dst_path = os.path.join(self._dst_lib, lib_path)
self._log(f"\tCopying: {src_path} -> {dst_path}")
shutil.copyfile(src_path, dst_path)
def _copy_source_lib(self):
self._log("Copying source library into output library")
assert os.path.exists(self._src_lib)
os.makedirs(self._dst_lib, exist_ok=True)
self._copy_file("zstd.h")
self._copy_file("zstd_errors.h")
for subdir in INCLUDED_SUBDIRS:
src_dir = os.path.join(self._src_lib, subdir)
dst_dir = os.path.join(self._dst_lib, subdir)
assert os.path.exists(src_dir)
os.makedirs(dst_dir, exist_ok=True)
for filename in os.listdir(src_dir):
lib_path = os.path.join(subdir, filename)
self._copy_file(lib_path)
def _copy_zstd_deps(self):
dst_zstd_deps = os.path.join(self._dst_lib, "common", "zstd_deps.h")
self._log(f"Copying zstd_deps: {self._zstd_deps} -> {dst_zstd_deps}")
shutil.copyfile(self._zstd_deps, dst_zstd_deps)
def _copy_mem(self):
dst_mem = os.path.join(self._dst_lib, "common", "mem.h")
self._log(f"Copying mem: {self._mem} -> {dst_mem}")
shutil.copyfile(self._mem, dst_mem)
def _hardwire_preprocessor(self, name: str, value: Optional[str] = None, undef=False):
"""
If value=None then hardwire that it is defined, but not what the value is.
If undef=True then value must be None.
If value='' then the macro is defined to '' exactly.
"""
assert not (undef and value is not None)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
def _hardwire_defines(self):
self._log("Hardwiring macros")
partial_preprocessor = PartialPreprocessor(self._defs, self._replaces, self._undefs)
for filepath in self._dst_lib_file_paths():
partial_preprocessor.preprocess(filepath)
def _remove_excludes(self):
self._log("Removing excluded sections")
for exclude in self._excludes:
self._log(f"\tRemoving excluded sections for: {exclude}")
begin_re = re.compile(f"BEGIN {exclude}")
end_re = re.compile(f"END {exclude}")
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
outlines = []
skipped = []
emit = True
for line in file.lines:
if emit and begin_re.search(line) is not None:
assert end_re.search(line) is None
emit = False
if emit:
outlines.append(line)
else:
skipped.append(line)
if end_re.search(line) is not None:
assert begin_re.search(line) is None
self._log(f"\t\tRemoving excluded section: {exclude}")
for s in skipped:
self._log(f"\t\t\t- {s}")
emit = True
skipped = []
if not emit:
raise RuntimeError("Excluded section unfinished!")
file.lines = outlines
file.write()
def _rewrite_include(self, original, rewritten):
self._log(f"\tRewriting include: {original} -> {rewritten}")
regex = re.compile(f"\\s*#\\s*include\\s*(?P<include>{original})")
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
match = regex.match(line)
if match is None:
continue
s = match.start('include')
e = match.end('include')
file.lines[i] = line[:s] + rewritten + line[e:]
file.write()
def _rewrite_includes(self):
self._log("Rewriting includes")
for original, rewritten in self._rewritten_includes:
self._rewrite_include(original, rewritten)
def _replace_xxh64_prefix(self):
if self._xxh64_prefix is None:
return
self._log(f"Replacing XXH64 prefix with {self._xxh64_prefix}")
replacements = []
if self._xxh64_state is not None:
replacements.append(
(re.compile(r"([^\w]|^)(?P<orig>XXH64_state_t)([^\w]|$)"), self._xxh64_state)
)
if self._xxh64_prefix is not None:
replacements.append(
(re.compile(r"([^\w]|^)(?P<orig>XXH64)[\(_]"), self._xxh64_prefix)
)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
modified = False
for regex, replacement in replacements:
match = regex.search(line)
while match is not None:
modified = True
b = match.start('orig')
e = match.end('orig')
line = line[:b] + replacement + line[e:]
match = regex.search(line)
if modified:
self._log(f"\t- {file.lines[i][:-1]}")
self._log(f"\t+ {line[:-1]}")
file.lines[i] = line
file.write()
def _parse_sed(self, sed):
assert sed[0] == 's'
delim = sed[1]
match = re.fullmatch(f's{delim}(.+){delim}(.*){delim}(.*)', sed)
assert match is not None
regex = re.compile(match.group(1))
format_str = match.group(2)
is_global = match.group(3) == 'g'
return regex, format_str, is_global
def _process_sed(self, sed):
self._log(f"Processing sed: {sed}")
regex, format_str, is_global = self._parse_sed(sed)
for filepath in self._dst_lib_file_paths():
file = FileLines(filepath)
for i, line in enumerate(file.lines):
modified = False
while True:
match = regex.search(line)
if match is None:
break
replacement = format_str.format(match.groups(''), match.groupdict(''))
b = match.start()
e = match.end()
line = line[:b] + replacement + line[e:]
modified = True
if not is_global:
break
if modified:
self._log(f"\t- {file.lines[i][:-1]}")
self._log(f"\t+ {line[:-1]}")
file.lines[i] = line
file.write()
def _process_seds(self):
self._log("Processing seds")
for sed in self._seds:
self._process_sed(sed)
def go(self):
self._copy_source_lib()
self._copy_zstd_deps()
self._copy_mem()
self._hardwire_defines()
self._remove_excludes()
self._rewrite_includes()
self._replace_xxh64_prefix()
self._process_seds()
def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]:
output = []
for define in defines:
parsed = define.split('=')
if len(parsed) == 1:
output.append((parsed[0], None))
elif len(parsed) == 2:
output.append((parsed[0], parsed[1]))
else:
raise RuntimeError(f"Bad define: {define}")
return output
def parse_pair(rewritten_includes: [str]) -> [(str, str)]:
output = []
for rewritten_include in rewritten_includes:
parsed = rewritten_include.split('=')
if len(parsed) == 2:
output.append((parsed[0], parsed[1]))
else:
raise RuntimeError(f"Bad rewritten include: {rewritten_include}")
return output
def main(name, args):
parser = argparse.ArgumentParser(prog=name)
parser.add_argument("--zstd-deps", default="zstd_deps.h", help="Zstd dependencies file")
parser.add_argument("--mem", default="mem.h", help="Memory module")
parser.add_argument("--source-lib", default="../../lib", help="Location of the zstd library")
parser.add_argument("--output-lib", default="./freestanding_lib", help="Where to output the freestanding zstd library")
parser.add_argument("--xxhash", default=None, help="Alternate external xxhash include e.g. --xxhash='<xxhash.h>'. If set xxhash is not included.")
parser.add_argument("--xxh64-state", default=None, help="Alternate XXH64 state type (excluding _) e.g. --xxh64-state='struct xxh64_state'")
parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64")
parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '<stddef\\.h>=<linux/types.h>')")
parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.")
parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)")
parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed mutliple times)")
parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition")
parser.add_argument("-E", "--exclude", default=[], dest="excludes", action="append", help="Exclude all lines between 'BEGIN <EXCLUDE>' and 'END <EXCLUDE>'")
args = parser.parse_args(args)
# Always remove threading
if "ZSTD_MULTITHREAD" not in args.undefs:
args.undefs.append("ZSTD_MULTITHREAD")
args.defs = parse_optional_pair(args.defs)
for name, _ in args.defs:
if name in args.undefs:
raise RuntimeError(f"{name} is both defined and undefined!")
# Always set tracing to 0
if "ZSTD_NO_TRACE" not in (arg[0] for arg in args.defs):
args.defs.append(("ZSTD_NO_TRACE", None))
args.defs.append(("ZSTD_TRACE", "0"))
args.replaces = parse_pair(args.replaces)
for name, _ in args.replaces:
if name in args.undefs or name in args.defs:
raise RuntimeError(f"{name} is both replaced and (un)defined!")
args.rewritten_includes = parse_pair(args.rewritten_includes)
external_xxhash = False
if args.xxhash is not None:
external_xxhash = True
args.rewritten_includes.append(('"(\\.\\./common/)?xxhash.h"', args.xxhash))
if args.xxh64_prefix is not None:
if not external_xxhash:
raise RuntimeError("--xxh64-prefix may only be used with --xxhash provided")
if args.xxh64_state is not None:
if not external_xxhash:
raise RuntimeError("--xxh64-state may only be used with --xxhash provided")
Freestanding(
args.zstd_deps,
args.mem,
args.source_lib,
args.output_lib,
external_xxhash,
args.xxh64_state,
args.xxh64_prefix,
args.rewritten_includes,
args.defs,
args.replaces,
args.undefs,
args.excludes,
args.seds,
).go()
if __name__ == "__main__":
main(sys.argv[0], sys.argv[1:])

View file

@ -0,0 +1,3 @@
# make artefact
gen_html
zstd_manual.html

View file

@ -0,0 +1,51 @@
# ################################################################
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
CXXFLAGS ?= -O3
CXXFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
CXXFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS)
ZSTDAPI = ../../lib/zstd.h
ZSTDMANUAL = ../../doc/zstd_manual.html
LIBVER_MAJOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
LIBVER_MINOR_SCRIPT:=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
LIBVER_PATCH_SCRIPT:=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(ZSTDAPI)`
LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT)
LIBVER := $(shell echo $(LIBVER_SCRIPT))
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
else
EXT =
endif
.PHONY: default
default: gen_html
.PHONY: all
all: manual
gen_html: gen_html.cpp
$(CXX) $(FLAGS) $^ -o $@$(EXT)
$(ZSTDMANUAL): gen_html $(ZSTDAPI)
echo "Update zstd manual in /doc"
./gen_html $(LIBVER) $(ZSTDAPI) $(ZSTDMANUAL)
.PHONY: manual
manual: gen_html $(ZSTDMANUAL)
.PHONY: clean
clean:
@$(RM) gen_html$(EXT)
@echo Cleaning completed

View file

@ -0,0 +1,31 @@
gen_html - a program for automatic generation of zstd manual
============================================================
#### Introduction
This simple C++ program generates a single-page HTML manual from `zstd.h`.
The format of recognized comment blocks is following:
- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
- comments of type `/*X` where `X` is different from above-mentioned are ignored
Moreover:
- `ZSTDLIB_API` is removed to improve readability
- `typedef` are detected and included even if uncommented
- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
#### Usage
The program requires 3 parameters:
```
gen_html [zstd_version] [input_file] [output_html]
```
To compile program and generate zstd manual we have used:
```
make
./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
```

View file

@ -0,0 +1,9 @@
#!/bin/sh
LIBVER_MAJOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_MINOR_SCRIPT=`sed -n '/define ZSTD_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_PATCH_SCRIPT=`sed -n '/define ZSTD_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/zstd.h`
LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
echo ZSTD_VERSION=$LIBVER_SCRIPT
./gen_html $LIBVER_SCRIPT ../../lib/zstd.h ./zstd_manual.html

View file

@ -0,0 +1,224 @@
/*
* Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
using namespace std;
/* trim string at the beginning and at the end */
void trim(string& s, string characters)
{
size_t p = s.find_first_not_of(characters);
s.erase(0, p);
p = s.find_last_not_of(characters);
if (string::npos != p)
s.erase(p+1);
}
/* trim C++ style comments */
void trim_comments(string &s)
{
size_t spos, epos;
spos = s.find("/*");
epos = s.find("*/");
s = s.substr(spos+3, epos-(spos+3));
}
/* get lines until a given terminator */
vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
{
vector<string> out;
string line;
size_t epos;
while ((size_t)linenum < input.size()) {
line = input[linenum];
if (terminator.empty() && line.empty()) { linenum--; break; }
epos = line.find(terminator);
if (!terminator.empty() && epos!=string::npos) {
out.push_back(line);
break;
}
out.push_back(line);
linenum++;
}
return out;
}
/* print line with ZSTDLIB_API removed and C++ comments not bold */
void print_line(stringstream &sout, string line)
{
size_t spos;
if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
spos = line.find("/*");
if (spos!=string::npos) {
sout << line.substr(0, spos);
sout << "</b>" << line.substr(spos) << "<b>" << endl;
} else {
// fprintf(stderr, "lines=%s\n", line.c_str());
sout << line << endl;
}
}
int main(int argc, char *argv[]) {
char exclam;
int linenum, chapter = 1;
vector<string> input, lines, comments, chapters;
string line, version;
size_t spos, l;
stringstream sout;
ifstream istream;
ofstream ostream;
if (argc < 4) {
cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
return 1;
}
version = "zstd " + string(argv[1]) + " Manual";
istream.open(argv[2], ifstream::in);
if (!istream.is_open()) {
cout << "Error opening file " << argv[2] << endl;
return 1;
}
ostream.open(argv[3], ifstream::out);
if (!ostream.is_open()) {
cout << "Error opening file " << argv[3] << endl;
return 1;
}
while (getline(istream, line)) {
input.push_back(line);
}
for (linenum=0; (size_t)linenum < input.size(); linenum++) {
line = input[linenum];
/* typedefs are detected and included even if uncommented */
if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
lines = get_lines(input, linenum, "}");
sout << "<pre><b>";
for (l=0; l<lines.size(); l++) {
print_line(sout, lines[l]);
}
sout << "</b></pre><BR>" << endl;
continue;
}
/* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
sout << "<pre><b>";
print_line(sout, line);
sout << "</b></pre><BR>" << endl;
continue;
}
spos = line.find("/**=");
if (spos==string::npos) {
spos = line.find("/*!");
if (spos==string::npos)
spos = line.find("/**");
if (spos==string::npos)
spos = line.find("/*-");
if (spos==string::npos)
spos = line.find("/*=");
if (spos==string::npos)
continue;
exclam = line[spos+2];
}
else exclam = '=';
comments = get_lines(input, linenum, "*/");
if (!comments.empty()) comments[0] = line.substr(spos+3);
if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
for (l=0; l<comments.size(); l++) {
if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
else if (comments[l].find(" *")==0) comments[l] = comments[l].substr(3);
trim(comments[l], "*-=");
}
while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
/* comments of type /*! mean: this is a function declaration; switch comments with declarations */
if (exclam == '!') {
if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
linenum++;
lines = get_lines(input, linenum, "");
sout << "<pre><b>";
for (l=0; l<lines.size(); l++) {
// fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
string fline = lines[l];
if (fline.substr(0, 12) == "ZSTDLIB_API " ||
fline.substr(0, 12) == string(12, ' '))
fline = fline.substr(12);
print_line(sout, fline);
}
sout << "</b><p>";
for (l=0; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
sout << "</p></pre><BR>" << endl << endl;
} else if (exclam == '=') { /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
trim(comments[0], " ");
sout << "<h3>" << comments[0] << "</h3><pre>";
for (l=1; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
sout << "</pre><b><pre>";
lines = get_lines(input, ++linenum, "");
for (l=0; l<lines.size(); l++) {
print_line(sout, lines[l]);
}
sout << "</pre></b><BR>" << endl;
} else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
if (comments.empty()) continue;
trim(comments[0], " ");
sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
chapters.push_back(comments[0]);
chapter++;
for (l=1; l<comments.size(); l++) {
print_line(sout, comments[l]);
}
if (comments.size() > 1)
sout << "<BR></pre>" << endl << endl;
else
sout << "</pre>" << endl << endl;
}
}
ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
ostream << "<h1>" << version << "</h1>\n";
ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
for (size_t i=0; i<chapters.size(); i++)
ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
ostream << "</ol>\n<hr>\n";
ostream << sout.str();
ostream << "</html>" << endl << "</body>" << endl;
return 0;
}

View file

@ -0,0 +1,2 @@
# build artifacts
largeNbDicts

View file

@ -0,0 +1,58 @@
# ################################################################
# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
PROGDIR = ../../programs
LIBDIR = ../../lib
LIBZSTD = $(LIBDIR)/libzstd.a
CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/common -I$(LIBDIR)/dictBuilder -I$(PROGDIR)
CFLAGS ?= -O3
CFLAGS += -std=gnu99
DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum \
-Wstrict-prototypes -Wundef -Wpointer-arith \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
default: largeNbDicts
all : largeNbDicts
largeNbDicts: util.o timefn.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
.PHONY: $(LIBZSTD)
$(LIBZSTD):
$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
benchfn.o: $(PROGDIR)/benchfn.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
timefn.o: $(PROGDIR)/timefn.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
datagen.o: $(PROGDIR)/datagen.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
util.o: $(PROGDIR)/util.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
xxhash.o : $(LIBDIR)/common/xxhash.c
$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
clean:
$(RM) *.o
$(MAKE) -C $(LIBDIR) clean > /dev/null
$(RM) largeNbDicts

View file

@ -0,0 +1,25 @@
largeNbDicts
=====================
`largeNbDicts` is a benchmark test tool
dedicated to the specific scenario of
dictionary decompression using a very large number of dictionaries.
When dictionaries are constantly changing, they are always "cold",
suffering from increased latency due to cache misses.
The tool is created in a bid to investigate performance for this scenario,
and experiment mitigation techniques.
Command line :
```
largeNbDicts [Options] filename(s)
Options :
-r : recursively load all files in subdirectories (default: off)
-B# : split input into blocks of size # (default: no split)
-# : use compression level # (default: 3)
-D # : use # as a dictionary (default: create one)
-i# : nb benchmark rounds (default: 6)
--nbDicts=# : set nb of dictionaries to # (default: one per block)
-h : help (this text)
```

View file

@ -0,0 +1,998 @@
/*
* Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* largeNbDicts
* This is a benchmark test tool
* dedicated to the specific case of dictionary decompression
* using a very large nb of dictionaries
* thus suffering latency from lots of cache misses.
* It's created in a bid to investigate performance and find optimizations. */
/*--- Dependencies ---*/
#include <stddef.h> /* size_t */
#include <stdlib.h> /* malloc, free, abort */
#include <stdio.h> /* fprintf */
#include <limits.h> /* UINT_MAX */
#include <assert.h> /* assert */
#include "util.h"
#include "benchfn.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "zdict.h"
/*--- Constants --- */
#define KB *(1<<10)
#define MB *(1<<20)
#define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
#define DICTSIZE (4 KB)
#define CLEVEL_DEFAULT 3
#define BENCH_TIME_DEFAULT_S 6
#define RUN_TIME_DEFAULT_MS 1000
#define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
#define DISPLAY_LEVEL_DEFAULT 3
#define BENCH_SIZE_MAX (1200 MB)
/*--- Macros ---*/
#define CONTROL(c) { if (!(c)) abort(); }
#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
/*--- Display Macros ---*/
#define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
/*--- buffer_t ---*/
typedef struct {
void* ptr;
size_t size;
size_t capacity;
} buffer_t;
static const buffer_t kBuffNull = { NULL, 0, 0 };
/* @return : kBuffNull if any error */
static buffer_t createBuffer(size_t capacity)
{
assert(capacity > 0);
void* const ptr = malloc(capacity);
if (ptr==NULL) return kBuffNull;
buffer_t buffer;
buffer.ptr = ptr;
buffer.capacity = capacity;
buffer.size = 0;
return buffer;
}
static void freeBuffer(buffer_t buff)
{
free(buff.ptr);
}
static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
{
size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
buff->size = readSize;
}
/* @return : kBuffNull if any error */
static buffer_t createBuffer_fromFile(const char* fileName)
{
U64 const fileSize = UTIL_getFileSize(fileName);
size_t const bufferSize = (size_t) fileSize;
if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
assert((U64)bufferSize == fileSize); /* check overflow */
{ FILE* const f = fopen(fileName, "rb");
if (f == NULL) return kBuffNull;
buffer_t buff = createBuffer(bufferSize);
CONTROL(buff.ptr != NULL);
fillBuffer_fromHandle(&buff, f);
CONTROL(buff.size == buff.capacity);
fclose(f); /* do nothing specific if fclose() fails */
return buff;
}
}
/* @return : kBuffNull if any error */
static buffer_t
createDictionaryBuffer(const char* dictionaryName,
const void* srcBuffer,
const size_t* srcBlockSizes, size_t nbBlocks,
size_t requestedDictSize)
{
if (dictionaryName) {
DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
} else {
DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
(unsigned)requestedDictSize);
void* const dictBuffer = malloc(requestedDictSize);
CONTROL(dictBuffer != NULL);
assert(nbBlocks <= UINT_MAX);
size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
srcBuffer,
srcBlockSizes, (unsigned)nbBlocks);
CONTROL(!ZSTD_isError(dictSize));
buffer_t result;
result.ptr = dictBuffer;
result.capacity = requestedDictSize;
result.size = dictSize;
return result;
}
}
static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel)
{
ZSTD_CCtx_params* params = ZSTD_createCCtxParams();
ZSTD_CCtxParams_init(params, compressionLevel);
ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1);
ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel);
ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem);
ZSTD_freeCCtxParams(params);
return cdict;
}
/*! BMK_loadFiles() :
* Loads `buffer`, with content from files listed within `fileNamesTable`.
* Fills `buffer` entirely.
* @return : 0 on success, !=0 on error */
static int loadFiles(void* buffer, size_t bufferSize,
size_t* fileSizes,
const char* const * fileNamesTable, unsigned nbFiles)
{
size_t pos = 0, totalSize = 0;
for (unsigned n=0; n<nbFiles; n++) {
U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
if (UTIL_isDirectory(fileNamesTable[n])) {
fileSizes[n] = 0;
continue;
}
if (fileSize == UTIL_FILESIZE_UNKNOWN) {
fileSizes[n] = 0;
continue;
}
FILE* const f = fopen(fileNamesTable[n], "rb");
assert(f!=NULL);
assert(pos <= bufferSize);
assert(fileSize <= bufferSize - pos);
{ size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
assert(readSize == fileSize);
pos += readSize;
}
fileSizes[n] = (size_t)fileSize;
totalSize += (size_t)fileSize;
fclose(f);
}
assert(totalSize == bufferSize);
return 0;
}
/*--- slice_collection_t ---*/
typedef struct {
void** slicePtrs;
size_t* capacities;
size_t nbSlices;
} slice_collection_t;
static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
static void freeSliceCollection(slice_collection_t collection)
{
free(collection.slicePtrs);
free(collection.capacities);
}
/* shrinkSizes() :
* downsizes sizes of slices within collection, according to `newSizes`.
* every `newSizes` entry must be <= than its corresponding collection size */
void shrinkSizes(slice_collection_t collection,
const size_t* newSizes) /* presumed same size as collection */
{
size_t const nbSlices = collection.nbSlices;
for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
assert(newSizes[blockNb] <= collection.capacities[blockNb]);
collection.capacities[blockNb] = newSizes[blockNb];
}
}
/* splitSlices() :
* nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
* otherwise, creates exactly nbSlices slices,
* by either truncating input (when smaller)
* or repeating input from beginning */
static slice_collection_t
splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
{
if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
size_t nbSrcBlocks = 0;
for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
size_t pos = 0;
while (pos <= srcSlices.capacities[ssnb]) {
nbSrcBlocks++;
pos += blockSize;
}
}
if (nbSlices == 0) nbSlices = nbSrcBlocks;
void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
if (sliceTable == NULL || capacities == NULL) {
free(sliceTable);
free(capacities);
return kNullCollection;
}
size_t ssnb = 0;
for (size_t sliceNb=0; sliceNb < nbSlices; ) {
ssnb = (ssnb + 1) % srcSlices.nbSlices;
size_t pos = 0;
char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
sliceTable[sliceNb] = ptr + pos;
capacities[sliceNb] = size;
sliceNb++;
pos += blockSize;
}
}
slice_collection_t result;
result.nbSlices = nbSlices;
result.slicePtrs = sliceTable;
result.capacities = capacities;
return result;
}
static size_t sliceCollection_totalCapacity(slice_collection_t sc)
{
size_t totalSize = 0;
for (size_t n=0; n<sc.nbSlices; n++)
totalSize += sc.capacities[n];
return totalSize;
}
/* --- buffer collection --- */
typedef struct {
buffer_t buffer;
slice_collection_t slices;
} buffer_collection_t;
static void freeBufferCollection(buffer_collection_t bc)
{
freeBuffer(bc.buffer);
freeSliceCollection(bc.slices);
}
static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
{
size_t const bufferSize = sliceCollection_totalCapacity(sc);
buffer_t buffer = createBuffer(bufferSize);
CONTROL(buffer.ptr != NULL);
size_t const nbSlices = sc.nbSlices;
void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
CONTROL(slices != NULL);
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
CONTROL(capacities != NULL);
char* const ptr = (char*)buffer.ptr;
size_t pos = 0;
for (size_t n=0; n < nbSlices; n++) {
capacities[n] = sc.capacities[n];
slices[n] = ptr + pos;
pos += capacities[n];
}
buffer_collection_t result;
result.buffer = buffer;
result.slices.nbSlices = nbSlices;
result.slices.capacities = capacities;
result.slices.slicePtrs = slices;
return result;
}
static buffer_collection_t
createBufferCollection_fromSliceCollection(slice_collection_t sc)
{
size_t const bufferSize = sliceCollection_totalCapacity(sc);
buffer_t buffer = createBuffer(bufferSize);
CONTROL(buffer.ptr != NULL);
size_t const nbSlices = sc.nbSlices;
void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
CONTROL(slices != NULL);
size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
CONTROL(capacities != NULL);
char* const ptr = (char*)buffer.ptr;
size_t pos = 0;
for (size_t n=0; n < nbSlices; n++) {
capacities[n] = sc.capacities[n];
slices[n] = ptr + pos;
pos += capacities[n];
}
for (size_t i = 0; i < nbSlices; i++) {
memcpy(slices[i], sc.slicePtrs[i], sc.capacities[i]);
capacities[i] = sc.capacities[i];
}
buffer_collection_t result;
result.buffer = buffer;
result.slices.nbSlices = nbSlices;
result.slices.capacities = capacities;
result.slices.slicePtrs = slices;
return result;
}
/* @return : kBuffNull if any error */
static buffer_collection_t
createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
{
U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
assert(totalSizeToLoad <= BENCH_SIZE_MAX);
size_t const loadedSize = (size_t)totalSizeToLoad;
assert(loadedSize > 0);
void* const srcBuffer = malloc(loadedSize);
assert(srcBuffer != NULL);
assert(nbFiles > 0);
size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
assert(fileSizes != NULL);
/* Load input buffer */
int const errorCode = loadFiles(srcBuffer, loadedSize,
fileSizes,
fileNamesTable, nbFiles);
assert(errorCode == 0);
void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
assert(sliceTable != NULL);
char* const ptr = (char*)srcBuffer;
size_t pos = 0;
unsigned fileNb = 0;
for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
sliceTable[fileNb] = ptr + pos;
pos += fileSizes[fileNb];
}
assert(pos == loadedSize);
assert(fileNb == nbFiles);
buffer_t buffer;
buffer.ptr = srcBuffer;
buffer.capacity = loadedSize;
buffer.size = loadedSize;
slice_collection_t slices;
slices.slicePtrs = sliceTable;
slices.capacities = fileSizes;
slices.nbSlices = nbFiles;
buffer_collection_t bc;
bc.buffer = buffer;
bc.slices = slices;
return bc;
}
/*--- ddict_collection_t ---*/
typedef struct {
ZSTD_DDict** ddicts;
size_t nbDDict;
} ddict_collection_t;
typedef struct {
ZSTD_CDict** cdicts;
size_t nbCDict;
} cdict_collection_t;
static const cdict_collection_t kNullCDictCollection = { NULL, 0 };
static void freeCDictCollection(cdict_collection_t cdictc)
{
for (size_t dictNb=0; dictNb < cdictc.nbCDict; dictNb++) {
ZSTD_freeCDict(cdictc.cdicts[dictNb]);
}
free(cdictc.cdicts);
}
/* returns .buffers=NULL if operation fails */
static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch)
{
ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*));
if (cdicts==NULL) return kNullCDictCollection;
for (size_t dictNb=0; dictNb < nbCDict; dictNb++) {
cdicts[dictNb] = dedicatedDictSearch ?
createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) :
ZSTD_createCDict(dictBuffer, dictSize, cLevel);
CONTROL(cdicts[dictNb] != NULL);
}
cdict_collection_t cdictc;
cdictc.cdicts = cdicts;
cdictc.nbCDict = nbCDict;
return cdictc;
}
static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
static void freeDDictCollection(ddict_collection_t ddictc)
{
for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
ZSTD_freeDDict(ddictc.ddicts[dictNb]);
}
free(ddictc.ddicts);
}
/* returns .buffers=NULL if operation fails */
static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
{
ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
assert(ddicts != NULL);
if (ddicts==NULL) return kNullDDictCollection;
for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
assert(ddicts[dictNb] != NULL);
}
ddict_collection_t ddictc;
ddictc.ddicts = ddicts;
ddictc.nbDDict = nbDDict;
return ddictc;
}
/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
void shuffleCDictionaries(cdict_collection_t dicts)
{
size_t const nbDicts = dicts.nbCDict;
for (size_t r=0; r<nbDicts; r++) {
size_t const d = (size_t)rand() % nbDicts;
ZSTD_CDict* tmpd = dicts.cdicts[d];
dicts.cdicts[d] = dicts.cdicts[r];
dicts.cdicts[r] = tmpd;
}
for (size_t r=0; r<nbDicts; r++) {
size_t const d1 = (size_t)rand() % nbDicts;
size_t const d2 = (size_t)rand() % nbDicts;
ZSTD_CDict* tmpd = dicts.cdicts[d1];
dicts.cdicts[d1] = dicts.cdicts[d2];
dicts.cdicts[d2] = tmpd;
}
}
/* mess with addresses, so that linear scanning dictionaries != linear address scanning */
void shuffleDDictionaries(ddict_collection_t dicts)
{
size_t const nbDicts = dicts.nbDDict;
for (size_t r=0; r<nbDicts; r++) {
size_t const d = (size_t)rand() % nbDicts;
ZSTD_DDict* tmpd = dicts.ddicts[d];
dicts.ddicts[d] = dicts.ddicts[r];
dicts.ddicts[r] = tmpd;
}
for (size_t r=0; r<nbDicts; r++) {
size_t const d1 = (size_t)rand() % nbDicts;
size_t const d2 = (size_t)rand() % nbDicts;
ZSTD_DDict* tmpd = dicts.ddicts[d1];
dicts.ddicts[d1] = dicts.ddicts[d2];
dicts.ddicts[d2] = tmpd;
}
}
/* --- Compression --- */
/* compressBlocks() :
* @return : total compressed size of all blocks,
* or 0 if error.
*/
static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
slice_collection_t dstBlockBuffers,
slice_collection_t srcBlockBuffers,
ZSTD_CDict* cdict, int cLevel)
{
size_t const nbBlocks = srcBlockBuffers.nbSlices;
assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
assert(cctx != NULL);
size_t totalCSize = 0;
for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
size_t cBlockSize;
if (cdict == NULL) {
cBlockSize = ZSTD_compressCCtx(cctx,
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
cLevel);
} else {
cBlockSize = ZSTD_compress_usingCDict(cctx,
dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
cdict);
}
CONTROL(!ZSTD_isError(cBlockSize));
if (cSizes) cSizes[blockNb] = cBlockSize;
totalCSize += cBlockSize;
}
return totalCSize;
}
/* --- Benchmark --- */
typedef struct {
ZSTD_CCtx* cctx;
size_t nbDicts;
size_t dictNb;
cdict_collection_t dictionaries;
} compressInstructions;
compressInstructions createCompressInstructions(cdict_collection_t dictionaries)
{
compressInstructions ci;
ci.cctx = ZSTD_createCCtx();
CONTROL(ci.cctx != NULL);
ci.nbDicts = dictionaries.nbCDict;
ci.dictNb = 0;
ci.dictionaries = dictionaries;
return ci;
}
void freeCompressInstructions(compressInstructions ci)
{
ZSTD_freeCCtx(ci.cctx);
}
typedef struct {
ZSTD_DCtx* dctx;
size_t nbDicts;
size_t dictNb;
ddict_collection_t dictionaries;
} decompressInstructions;
decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
{
decompressInstructions di;
di.dctx = ZSTD_createDCtx();
assert(di.dctx != NULL);
di.nbDicts = dictionaries.nbDDict;
di.dictNb = 0;
di.dictionaries = dictionaries;
return di;
}
void freeDecompressInstructions(decompressInstructions di)
{
ZSTD_freeDCtx(di.dctx);
}
/* benched function */
size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
{
compressInstructions* const ci = (compressInstructions*) payload;
(void)dstCapacity;
ZSTD_compress_usingCDict(ci->cctx,
dst, srcSize,
src, srcSize,
ci->dictionaries.cdicts[ci->dictNb]);
ci->dictNb = ci->dictNb + 1;
if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0;
return srcSize;
}
/* benched function */
size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
{
decompressInstructions* const di = (decompressInstructions*) payload;
size_t const result = ZSTD_decompress_usingDDict(di->dctx,
dst, dstCapacity,
src, srcSize,
di->dictionaries.ddicts[di->dictNb]);
di->dictNb = di->dictNb + 1;
if (di->dictNb >= di->nbDicts) di->dictNb = 0;
return result;
}
static int benchMem(slice_collection_t dstBlocks,
slice_collection_t srcBlocks,
ddict_collection_t ddictionaries,
cdict_collection_t cdictionaries,
unsigned nbRounds, int benchCompression)
{
assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
unsigned const total_time_ms = nbRounds * ms_per_round;
double bestSpeed = 0.;
BMK_timedFnState_t* const benchState =
BMK_createTimedFnState(total_time_ms, ms_per_round);
decompressInstructions di = createDecompressInstructions(ddictionaries);
compressInstructions ci = createCompressInstructions(cdictionaries);
void* payload = benchCompression ? (void*)&ci : (void*)&di;
BMK_benchParams_t const bp = {
.benchFn = benchCompression ? compress : decompress,
.benchPayload = payload,
.initFn = NULL,
.initPayload = NULL,
.errorFn = ZSTD_isError,
.blockCount = dstBlocks.nbSlices,
.srcBuffers = (const void* const*) srcBlocks.slicePtrs,
.srcSizes = srcBlocks.capacities,
.dstBuffers = dstBlocks.slicePtrs,
.dstCapacities = dstBlocks.capacities,
.blockResults = NULL
};
for (;;) {
BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
CONTROL(BMK_isSuccessful_runOutcome(outcome));
BMK_runTime_t const result = BMK_extract_runTime(outcome);
double const dTime_ns = result.nanoSecPerRun;
double const dTime_sec = (double)dTime_ns / 1000000000;
size_t const srcSize = result.sumOfReturn;
double const speed_MBps = (double)srcSize / dTime_sec / (1 MB);
if (speed_MBps > bestSpeed) bestSpeed = speed_MBps;
if (benchCompression)
DISPLAY("Compression Speed : %.1f MB/s \r", bestSpeed);
else
DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
fflush(stdout);
if (BMK_isCompleted_TimedFn(benchState)) break;
}
DISPLAY("\n");
freeDecompressInstructions(di);
freeCompressInstructions(ci);
BMK_freeTimedFnState(benchState);
return 0; /* success */
}
/*! bench() :
* fileName : file to load for benchmarking purpose
* dictionary : optional (can be NULL), file to load as dictionary,
* if none provided : will be calculated on the fly by the program.
* @return : 0 is success, 1+ otherwise */
int bench(const char** fileNameTable, unsigned nbFiles,
const char* dictionary,
size_t blockSize, int clevel,
unsigned nbDictMax, unsigned nbBlocks,
unsigned nbRounds, int benchCompression,
int dedicatedDictSearch)
{
int result = 0;
DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
CONTROL(srcs.buffer.ptr != NULL);
buffer_t srcBuffer = srcs.buffer;
size_t const srcSize = srcBuffer.size;
DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
(double)srcSize / (1 MB));
slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
nbBlocks = (unsigned)(srcSlices.nbSlices);
DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
if (blockSize)
DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
DISPLAYLEVEL(3, "\n");
size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
CONTROL(dstCapacities != NULL);
size_t dstBufferCapacity = 0;
for (size_t bnb=0; bnb<nbBlocks; bnb++) {
dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
dstBufferCapacity += dstCapacities[bnb];
}
buffer_t dstBuffer = createBuffer(dstBufferCapacity);
CONTROL(dstBuffer.ptr != NULL);
void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
CONTROL(sliceTable != NULL);
{ char* const ptr = dstBuffer.ptr;
size_t pos = 0;
for (size_t snb=0; snb < nbBlocks; snb++) {
sliceTable[snb] = ptr + pos;
pos += dstCapacities[snb];
} }
slice_collection_t dstSlices;
dstSlices.capacities = dstCapacities;
dstSlices.slicePtrs = sliceTable;
dstSlices.nbSlices = nbBlocks;
/* dictionary determination */
buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
srcs.buffer.ptr,
srcs.slices.capacities, srcs.slices.nbSlices,
DICTSIZE);
CONTROL(dictBuffer.ptr != NULL);
ZSTD_CDict* const cdict = dedicatedDictSearch ?
createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) :
ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
CONTROL(cdict != NULL);
size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
CONTROL(cTotalSizeNoDict != 0);
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
clevel,
(double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
CONTROL(cSizes != NULL);
size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
CONTROL(cTotalSize != 0);
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
(unsigned)dictBuffer.size,
(double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
shrinkSizes(dstSlices, cSizes);
unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch);
CONTROL(cdictionaries.cdicts != NULL);
ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
CONTROL(ddictionaries.ddicts != NULL);
if (benchCompression) {
size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
size_t const allDictMem = dictMem * nbDicts;
DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
nbDicts, (double)allDictMem / (1 MB));
shuffleCDictionaries(cdictionaries);
buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices);
CONTROL(resultCollection.buffer.ptr != NULL);
result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression);
freeBufferCollection(resultCollection);
} else {
size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
size_t const allDictMem = dictMem * nbDicts;
DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
nbDicts, (double)allDictMem / (1 MB));
shuffleDDictionaries(ddictionaries);
buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
CONTROL(resultCollection.buffer.ptr != NULL);
result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression);
freeBufferCollection(resultCollection);
}
/* free all heap objects in reverse order */
freeCDictCollection(cdictionaries);
freeDDictCollection(ddictionaries);
free(cSizes);
ZSTD_freeCDict(cdict);
freeBuffer(dictBuffer);
freeSliceCollection(dstSlices);
freeBuffer(dstBuffer);
freeSliceCollection(srcSlices);
freeBufferCollection(srcs);
return result;
}
/* --- Command Line --- */
/*! readU32FromChar() :
* @return : unsigned integer value read from input in `char` format.
* allows and interprets K, KB, KiB, M, MB and MiB suffix.
* Will also modify `*stringPtr`, advancing it to position where it stopped reading.
* Note : function will exit() program if digit sequence overflows */
static unsigned readU32FromChar(const char** stringPtr)
{
unsigned result = 0;
while ((**stringPtr >='0') && (**stringPtr <='9')) {
unsigned const max = (((unsigned)(-1)) / 10) - 1;
assert(result <= max); /* check overflow */
result *= 10, result += (unsigned)**stringPtr - '0', (*stringPtr)++ ;
}
if ((**stringPtr=='K') || (**stringPtr=='M')) {
unsigned const maxK = ((unsigned)(-1)) >> 10;
assert(result <= maxK); /* check overflow */
result <<= 10;
if (**stringPtr=='M') {
assert(result <= maxK); /* check overflow */
result <<= 10;
}
(*stringPtr)++; /* skip `K` or `M` */
if (**stringPtr=='i') (*stringPtr)++;
if (**stringPtr=='B') (*stringPtr)++;
}
return result;
}
/** longCommandWArg() :
* check if *stringPtr is the same as longCommand.
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
* @return 0 and doesn't modify *stringPtr otherwise.
*/
static int longCommandWArg(const char** stringPtr, const char* longCommand)
{
size_t const comSize = strlen(longCommand);
int const result = !strncmp(*stringPtr, longCommand, comSize);
if (result) *stringPtr += comSize;
return result;
}
int usage(const char* exeName)
{
DISPLAY (" \n");
DISPLAY (" %s [Options] filename(s) \n", exeName);
DISPLAY (" \n");
DISPLAY ("Options : \n");
DISPLAY ("-z : benchmark compression (default) \n");
DISPLAY ("-d : benchmark decompression \n");
DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
DISPLAY ("-h : help (this text) \n");
return 0;
}
int bad_usage(const char* exeName)
{
DISPLAY (" bad usage : \n");
usage(exeName);
return 1;
}
int main (int argc, const char** argv)
{
int recursiveMode = 0;
int benchCompression = 1;
int dedicatedDictSearch = 0;
unsigned nbRounds = BENCH_TIME_DEFAULT_S;
const char* const exeName = argv[0];
if (argc < 2) return bad_usage(exeName);
const char** nameTable = (const char**)malloc((size_t)argc * sizeof(const char*));
assert(nameTable != NULL);
unsigned nameIdx = 0;
const char* dictionary = NULL;
int cLevel = CLEVEL_DEFAULT;
size_t blockSize = BLOCKSIZE_DEFAULT;
unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
for (int argNb = 1; argNb < argc ; argNb++) {
const char* argument = argv[argNb];
if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
if (!strcmp(argument, "-d")) { benchCompression = 0; continue; }
if (!strcmp(argument, "-z")) { benchCompression = 1; continue; }
if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; }
if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; }
/* anything that's not a command is a filename */
nameTable[nameIdx++] = argument;
}
FileNamesTable* filenameTable;
if (recursiveMode) {
#ifndef UTIL_HAS_CREATEFILELIST
assert(0); /* missing capability, do not run */
#endif
filenameTable = UTIL_createExpandedFNT(nameTable, nameIdx, 1 /* follow_links */);
} else {
filenameTable = UTIL_assembleFileNamesTable(nameTable, nameIdx, NULL);
nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */
}
int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch);
UTIL_freeFileNamesTable(filenameTable);
free(nameTable);
return result;
}

View file

@ -0,0 +1,4 @@
!lib/zstd
!lib/zstd/*
*.o
*.a

View file

@ -0,0 +1,95 @@
# ################################################################
# Copyright (c) Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ################################################################
.PHONY: libzstd
libzstd:
rm -rf linux
mkdir -p linux
mkdir -p linux/include/linux
mkdir -p linux/lib/zstd
../freestanding_lib/freestanding.py \
--source-lib ../../lib \
--output-lib linux/lib/zstd \
--xxhash '<linux/xxhash.h>' \
--xxh64-state 'struct xxh64_state' \
--xxh64-prefix 'xxh64' \
--rewrite-include '<limits\.h>=<linux/limits.h>' \
--rewrite-include '<stddef\.h>=<linux/types.h>' \
--rewrite-include '"\.\./zstd.h"=<linux/zstd.h>' \
--rewrite-include '"(\.\./)?zstd_errors.h"=<linux/zstd_errors.h>' \
--sed 's,/\*\*\*,/* *,g' \
--sed 's,/\*\*,/*,g' \
-DZSTD_NO_INTRINSICS \
-DZSTD_NO_UNUSED_FUNCTIONS \
-DZSTD_LEGACY_SUPPORT=0 \
-DZSTD_STATIC_LINKING_ONLY \
-DFSE_STATIC_LINKING_ONLY \
-DHUF_STATIC_LINKING_ONLY \
-DXXH_STATIC_LINKING_ONLY \
-DMEM_FORCE_MEMORY_ACCESS=0 \
-D__GNUC__ \
-DSTATIC_BMI2=0 \
-DZSTD_ADDRESS_SANITIZER=0 \
-DZSTD_MEMORY_SANITIZER=0 \
-DZSTD_COMPRESS_HEAPMODE=1 \
-UZSTD_NO_INLINE \
-UNO_PREFETCH \
-U__cplusplus \
-UZSTD_DLL_EXPORT \
-UZSTD_DLL_IMPORT \
-U__ICCARM__ \
-UZSTD_MULTITHREAD \
-U_MSC_VER \
-U_WIN32 \
-RZSTDLIB_VISIBILITY= \
-RZSTDERRORLIB_VISIBILITY= \
-DZSTD_HAVE_WEAK_SYMBOLS=0 \
-DZSTD_TRACE=0 \
-DZSTD_NO_TRACE
mv linux/lib/zstd/zstd.h linux/include/linux/zstd_lib.h
mv linux/lib/zstd/zstd_errors.h linux/include/linux/
cp linux_zstd.h linux/include/linux/zstd.h
cp zstd_compress_module.c linux/lib/zstd
cp zstd_decompress_module.c linux/lib/zstd
cp decompress_sources.h linux/lib/zstd
cp linux.mk linux/lib/zstd/Makefile
LINUX ?= $(HOME)/repos/linux
.PHONY: import
import: libzstd
rm -f $(LINUX)/include/linux/zstd.h
rm -f $(LINUX)/include/linux/zstd_errors.h
rm -rf $(LINUX)/lib/zstd
cp linux/include/linux/zstd.h $(LINUX)/include/linux
cp linux/include/linux/zstd_lib.h $(LINUX)/include/linux
cp linux/include/linux/zstd_errors.h $(LINUX)/include/linux
cp -r linux/lib/zstd $(LINUX)/lib
import-upstream:
rm -rf $(LINUX)/lib/zstd
mkdir $(LINUX)/lib/zstd
cp ../../lib/zstd.h $(LINUX)/include/linux/zstd_lib.h
cp -r ../../lib/common $(LINUX)/lib/zstd
cp -r ../../lib/compress $(LINUX)/lib/zstd
cp -r ../../lib/decompress $(LINUX)/lib/zstd
mv $(LINUX)/lib/zstd/zstd_errors.h $(LINUX)/include/linux
rm $(LINUX)/lib/zstd/common/threading.*
rm $(LINUX)/lib/zstd/common/pool.*
rm $(LINUX)/lib/zstd/common/xxhash.*
rm $(LINUX)/lib/zstd/compress/zstdmt_*
.PHONY: test
test: libzstd
$(MAKE) -C test run-test CFLAGS="-O3 $(CFLAGS)" -j
.PHONY: clean
clean:
$(RM) -rf linux

View file

@ -0,0 +1,14 @@
# Zstd in the Linux Kernel
This directory contains the scripts needed to transform upstream zstd into the version imported into the kernel. All the transforms are automated and tested by our continuous integration.
## Upgrading Zstd in the Linux Kernel
1. `cd` into this directory.
2. Run `make libzstd` and read the output. Make sure that all the diffs printed and changes made by the script are correct.
3. Run `make test` and ensure that it passes.
4. Import zstd into the Linux Kernel `make import LINUX=/path/to/linux/repo`
5. Inspect the diff for sanity.
6. Check the Linux Kernel history for zstd. If any patches were made to the kernel version of zstd, but not to upstream zstd, then port them upstream if necessary.
7. Test the diff. Benchmark if necessary. Make sure to test multiple architectures: At least x86, i386, and arm.
8. Submit the patch to the LKML.

View file

@ -0,0 +1,104 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# silesia is a directory that can be downloaded from
# http://mattmahoney.net/dc/silesia.html
# ls -l silesia/
# total 203M
# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens
# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla
# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr
# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci
# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice
# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb
# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont
# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba
# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao
# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster
# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray
# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml
# $HOME is on a ext4 filesystem
BENCHMARK_DIR="$HOME/silesia/"
N=10
# Normalize the environment
sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
sudo rm -rf /mnt/btrfs/*
sync
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
# Run the benchmark
echo "Compression"
time sh -c "for i in \$(seq $N); do sudo cp -r $BENCHMARK_DIR /mnt/btrfs/\$i; done; sync"
echo "Approximate compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Decompression"
time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null
sudo rm -rf /mnt/btrfs/*
sudo umount /mnt/btrfs
# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the
# min time and ratio.
# Ran zstd with compression levels {1, 3, 6, 9, 12, 15}.
# Original size: 2119415342 B (using du /mnt/btrfs)
# none
# compress: 4.205 s
# decompress: 3.090 s
# ratio: 0.99
# lzo
# compress: 5.328 s
# decompress: 4.793 s
# ratio: 1.66
# zlib
# compress: 32.588 s
# decompress: 8.791 s
# ratio : 2.58
# zstd 1
# compress: 8.147 s
# decompress: 5.527 s
# ratio : 2.57
# zstd 3
# compress: 12.207 s
# decompress: 5.195 s
# ratio : 2.71
# zstd 6
# compress: 30.253 s
# decompress: 5.324 s
# ratio : 2.87
# zstd 9
# compress: 49.659 s
# decompress: 5.220 s
# ratio : 2.92
# zstd 12
# compress: 99.245 s
# decompress: 5.193 s
# ratio : 2.93
# zstd 15
# compress: 196.997 s
# decompress: 5.992 s
# ratio : 3.01

View file

@ -0,0 +1,99 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# silesia is a directory that can be downloaded from
# http://mattmahoney.net/dc/silesia.html
# ls -l silesia/
# total 203M
# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens
# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla
# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr
# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci
# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice
# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb
# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont
# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba
# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao
# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster
# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray
# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml
# $HOME is on a ext4 filesystem
BENCHMARK_FILE="linux-4.11.6.tar"
BENCHMARK_DIR="$HOME/$BENCHMARK_FILE"
# Normalize the environment
sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
sudo rm -rf /mnt/btrfs/*
sync
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
# Run the benchmark
echo "Copy"
time sh -c "sudo cp -r $BENCHMARK_DIR /mnt/btrfs/$BENCHMARK_FILE && sync"
echo "Approximate tarred compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Extract"
time sh -c "sudo tar -C /mnt/btrfs -xf /mnt/btrfs/$BENCHMARK_FILE && sync"
# Remove the tarball, leaving only the extracted data
sudo rm /mnt/btrfs/$BENCHMARK_FILE
# Unmount and remount to avoid any caching
sudo umount /mnt/btrfs
sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs
echo "Approximate extracted compression ratio"
printf "%d / %d\n" \
$(df /mnt/btrfs --output=used -B 1 | tail -n 1) \
$(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1);
echo "Read"
time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null
sudo rm -rf /mnt/btrfs/*
sudo umount /mnt/btrfs
# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the
# min time and ratio.
# none
# copy: 0.981 s
# extract: 5.501 s
# read: 8.807 s
# tarball ratio: 0.97
# extracted ratio: 0.78
# lzo
# copy: 1.631 s
# extract: 8.458 s
# read: 8.585 s
# tarball ratio: 2.06
# extracted ratio: 1.38
# zlib
# copy: 7.750 s
# extract: 21.544 s
# read: 11.744 s
# tarball ratio : 3.40
# extracted ratio: 1.86
# zstd 1
# copy: 2.579 s
# extract: 11.479 s
# read: 9.389 s
# tarball ratio : 3.57
# extracted ratio: 1.85

View file

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*
* This file includes every .c file needed for decompression.
* It is used by lib/decompress_unzstd.c to include the decompression
* source into the translation-unit, so it can be used for kernel
* decompression.
*/
#include "common/debug.c"
#include "common/entropy_common.c"
#include "common/error_private.c"
#include "common/fse_decompress.c"
#include "common/zstd_common.c"
#include "decompress/huf_decompress.c"
#include "decompress/zstd_ddict.c"
#include "decompress/zstd_decompress.c"
#include "decompress/zstd_decompress_block.c"
#include "zstd_decompress_module.c"

View file

@ -0,0 +1,46 @@
# SPDX-License-Identifier: GPL-2.0-only
# ################################################################
# Copyright (c) Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ################################################################
obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
ccflags-y += -O3
zstd_compress-y := \
zstd_compress_module.o \
common/debug.o \
common/entropy_common.o \
common/error_private.o \
common/fse_decompress.o \
common/zstd_common.o \
compress/fse_compress.o \
compress/hist.o \
compress/huf_compress.o \
compress/zstd_compress.o \
compress/zstd_compress_literals.o \
compress/zstd_compress_sequences.o \
compress/zstd_compress_superblock.o \
compress/zstd_double_fast.o \
compress/zstd_fast.o \
compress/zstd_lazy.o \
compress/zstd_ldm.o \
compress/zstd_opt.o \
zstd_decompress-y := \
zstd_decompress_module.o \
common/debug.o \
common/entropy_common.o \
common/error_private.o \
common/fse_decompress.o \
common/zstd_common.o \
decompress/huf_decompress.o \
decompress/zstd_ddict.o \
decompress/zstd_decompress.o \
decompress/zstd_decompress_block.o \

View file

@ -0,0 +1,447 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of https://github.com/facebook/zstd) and
* the GPLv2 (found in the COPYING file in the root directory of
* https://github.com/facebook/zstd). You may select, at your option, one of the
* above-listed licenses.
*/
#ifndef LINUX_ZSTD_H
#define LINUX_ZSTD_H
/**
* This is a kernel-style API that wraps the upstream zstd API, which cannot be
* used directly because the symbols aren't exported. It exposes the minimal
* functionality which is currently required by users of zstd in the kernel.
* Expose extra functions from lib/zstd/zstd.h as needed.
*/
/* ====== Dependency ====== */
#include <linux/types.h>
#include <linux/zstd_errors.h>
#include <linux/zstd_lib.h>
/* ====== Helper Functions ====== */
/**
* zstd_compress_bound() - maximum compressed size in worst case scenario
* @src_size: The size of the data to compress.
*
* Return: The maximum compressed size in the worst case scenario.
*/
size_t zstd_compress_bound(size_t src_size);
/**
* zstd_is_error() - tells if a size_t function result is an error code
* @code: The function result to check for error.
*
* Return: Non-zero iff the code is an error.
*/
unsigned int zstd_is_error(size_t code);
/**
* enum zstd_error_code - zstd error codes
*/
typedef ZSTD_ErrorCode zstd_error_code;
/**
* zstd_get_error_code() - translates an error function result to an error code
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: A unique error code for this error.
*/
zstd_error_code zstd_get_error_code(size_t code);
/**
* zstd_get_error_name() - translates an error function result to a string
* @code: The function result for which zstd_is_error(code) is true.
*
* Return: An error string corresponding to the error code.
*/
const char *zstd_get_error_name(size_t code);
/**
* zstd_min_clevel() - minimum allowed compression level
*
* Return: The minimum allowed compression level.
*/
int zstd_min_clevel(void);
/**
* zstd_max_clevel() - maximum allowed compression level
*
* Return: The maximum allowed compression level.
*/
int zstd_max_clevel(void);
/* ====== Parameter Selection ====== */
/**
* enum zstd_strategy - zstd compression search strategy
*
* From faster to stronger. See zstd_lib.h.
*/
typedef ZSTD_strategy zstd_strategy;
/**
* struct zstd_compression_parameters - zstd compression parameters
* @windowLog: Log of the largest match distance. Larger means more
* compression, and more memory needed during decompression.
* @chainLog: Fully searched segment. Larger means more compression,
* slower, and more memory (useless for fast).
* @hashLog: Dispatch table. Larger means more compression,
* slower, and more memory.
* @searchLog: Number of searches. Larger means more compression and slower.
* @searchLength: Match length searched. Larger means faster decompression,
* sometimes less compression.
* @targetLength: Acceptable match size for optimal parser (only). Larger means
* more compression, and slower.
* @strategy: The zstd compression strategy.
*
* See zstd_lib.h.
*/
typedef ZSTD_compressionParameters zstd_compression_parameters;
/**
* struct zstd_frame_parameters - zstd frame parameters
* @contentSizeFlag: Controls whether content size will be present in the
* frame header (when known).
* @checksumFlag: Controls whether a 32-bit checksum is generated at the
* end of the frame for error detection.
* @noDictIDFlag: Controls whether dictID will be saved into the frame
* header when using dictionary compression.
*
* The default value is all fields set to 0. See zstd_lib.h.
*/
typedef ZSTD_frameParameters zstd_frame_parameters;
/**
* struct zstd_parameters - zstd parameters
* @cParams: The compression parameters.
* @fParams: The frame parameters.
*/
typedef ZSTD_parameters zstd_parameters;
/**
* zstd_get_params() - returns zstd_parameters for selected level
* @level: The compression level
* @estimated_src_size: The estimated source size to compress or 0
* if unknown.
*
* Return: The selected zstd_parameters.
*/
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size);
/* ====== Single-pass Compression ====== */
typedef ZSTD_CCtx zstd_cctx;
/**
* zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
* @parameters: The compression parameters to be used.
*
* If multiple compression parameters might be used, the caller must call
* zstd_cctx_workspace_bound() for each set of parameters and use the maximum
* size.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cctx().
*/
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
/**
* zstd_init_cctx() - initialize a zstd compression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd compression context or NULL on error.
*/
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
/**
* zstd_compress_cctx() - compress src into dst with the initialized parameters
* @cctx: The context. Must have been initialized with zstd_init_cctx().
* @dst: The buffer to compress src into.
* @dst_capacity: The size of the destination buffer. May be any size, but
* ZSTD_compressBound(srcSize) is guaranteed to be large enough.
* @src: The data to compress.
* @src_size: The size of the data to compress.
* @parameters: The compression parameters to be used.
*
* Return: The compressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters);
/* ====== Single-pass Decompression ====== */
typedef ZSTD_DCtx zstd_dctx;
/**
* zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_dctx().
*/
size_t zstd_dctx_workspace_bound(void);
/**
* zstd_init_dctx() - initialize a zstd decompression context
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
* determine how large the workspace must be.
*
* Return: A zstd decompression context or NULL on error.
*/
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
/**
* zstd_decompress_dctx() - decompress zstd compressed src into dst
* @dctx: The decompression context.
* @dst: The buffer to decompress src into.
* @dst_capacity: The size of the destination buffer. Must be at least as large
* as the decompressed size. If the caller cannot upper bound the
* decompressed size, then it's better to use the streaming API.
* @src: The zstd compressed data to decompress. Multiple concatenated
* frames and skippable frames are allowed.
* @src_size: The exact size of the data to decompress.
*
* Return: The decompressed size or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size);
/* ====== Streaming Buffers ====== */
/**
* struct zstd_in_buffer - input buffer for streaming
* @src: Start of the input buffer.
* @size: Size of the input buffer.
* @pos: Position where reading stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_inBuffer zstd_in_buffer;
/**
* struct zstd_out_buffer - output buffer for streaming
* @dst: Start of the output buffer.
* @size: Size of the output buffer.
* @pos: Position where writing stopped. Will be updated.
* Necessarily 0 <= pos <= size.
*
* See zstd_lib.h.
*/
typedef ZSTD_outBuffer zstd_out_buffer;
/* ====== Streaming Compression ====== */
typedef ZSTD_CStream zstd_cstream;
/**
* zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
* @cparams: The compression parameters to be used for compression.
*
* Return: A lower bound on the size of the workspace that is passed to
* zstd_init_cstream().
*/
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
/**
* zstd_init_cstream() - initialize a zstd streaming compression context
* @parameters The zstd parameters to use for compression.
* @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
* must pass the source size (zero means empty source).
* Otherwise, the caller may optionally pass the source
* size, or zero if unknown.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspace_size: The size of workspace.
* Use zstd_cstream_workspace_bound(params->cparams) to
* determine how large the workspace must be.
*
* Return: The zstd streaming compression context or NULL on error.
*/
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
/**
* zstd_reset_cstream() - reset the context using parameters from creation
* @cstream: The zstd streaming compression context to reset.
* @pledged_src_size: Optionally the source size, or zero if unknown.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused. If `pledged_src_size` is non-zero the frame
* content size is always written into the frame header.
*
* Return: Zero or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size);
/**
* zstd_compress_stream() - streaming compress some of input into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
* @input: Source buffer. `input->pos` is updated to indicate how much data
* was read. Note that it may not consume the entire input, in which
* case `input->pos < input->size`, and it's up to the caller to
* present remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
*
* Return: A hint for the number of bytes to use as the input for the next
* function call or an error, which can be checked using
* zstd_is_error().
*/
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/**
* zstd_flush_stream() - flush internal buffers into output
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_flush_stream() must be called until it returns 0, meaning all the data
* has been flushed. Since zstd_flush_stream() causes a block to be ended,
* calling it too often will degrade the compression ratio.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/**
* zstd_end_stream() - flush internal buffers into output and end the frame
* @cstream: The zstd streaming compression context.
* @output: Destination buffer. `output->pos` is updated to indicate how much
* compressed data was written.
*
* zstd_end_stream() must be called until it returns 0, meaning all the data has
* been flushed and the frame epilogue has been written.
*
* Return: The number of bytes still present within internal buffers or an
* error, which can be checked using zstd_is_error().
*/
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
/* ====== Streaming Decompression ====== */
typedef ZSTD_DStream zstd_dstream;
/**
* zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
* @max_window_size: The maximum window size allowed for compressed frames.
*
* Return: A lower bound on the size of the workspace that is passed
* to zstd_init_dstream().
*/
size_t zstd_dstream_workspace_bound(size_t max_window_size);
/**
* zstd_init_dstream() - initialize a zstd streaming decompression context
* @max_window_size: The maximum window size allowed for compressed frames.
* @workspace: The workspace to emplace the context into. It must outlive
* the returned context.
* @workspaceSize: The size of workspace.
* Use zstd_dstream_workspace_bound(max_window_size) to
* determine how large the workspace must be.
*
* Return: The zstd streaming decompression context.
*/
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size);
/**
* zstd_reset_dstream() - reset the context using parameters from creation
* @dstream: The zstd streaming decompression context to reset.
*
* Resets the context using the parameters from creation. Skips dictionary
* loading, since it can be reused.
*
* Return: Zero or an error, which can be checked using zstd_is_error().
*/
size_t zstd_reset_dstream(zstd_dstream *dstream);
/**
* zstd_decompress_stream() - streaming decompress some of input into output
* @dstream: The zstd streaming decompression context.
* @output: Destination buffer. `output.pos` is updated to indicate how much
* decompressed data was written.
* @input: Source buffer. `input.pos` is updated to indicate how much data was
* read. Note that it may not consume the entire input, in which case
* `input.pos < input.size`, and it's up to the caller to present
* remaining data again.
*
* The `input` and `output` buffers may be any size. Guaranteed to make some
* forward progress if `input` and `output` are not empty.
* zstd_decompress_stream() will not consume the last byte of the frame until
* the entire frame is flushed.
*
* Return: Returns 0 iff a frame is completely decoded and fully flushed.
* Otherwise returns a hint for the number of bytes to use as the
* input for the next function call or an error, which can be checked
* using zstd_is_error(). The size hint will never load more than the
* frame.
*/
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input);
/* ====== Frame Inspection Functions ====== */
/**
* zstd_find_frame_compressed_size() - returns the size of a compressed frame
* @src: Source buffer. It should point to the start of a zstd encoded
* frame or a skippable frame.
* @src_size: The size of the source buffer. It must be at least as large as the
* size of the frame.
*
* Return: The compressed size of the frame pointed to by `src` or an error,
* which can be check with zstd_is_error().
* Suitable to pass to ZSTD_decompress() or similar functions.
*/
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
/**
* struct zstd_frame_params - zstd frame parameters stored in the frame header
* @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
* present.
* @windowSize: The window size, or 0 if the frame is a skippable frame.
* @blockSizeMax: The maximum block size.
* @frameType: The frame type (zstd or skippable)
* @headerSize: The size of the frame header.
* @dictID: The dictionary id, or 0 if not present.
* @checksumFlag: Whether a checksum was used.
*
* See zstd_lib.h.
*/
typedef ZSTD_frameHeader zstd_frame_header;
/**
* zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
* @params: On success the frame parameters are written here.
* @src: The source buffer. It must point to a zstd or skippable frame.
* @src_size: The size of the source buffer.
*
* Return: 0 on success. If more data is required it returns how many bytes
* must be provided to make forward progress. Otherwise it returns
* an error, which can be checked using zstd_is_error().
*/
size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
size_t src_size);
#endif /* LINUX_ZSTD_H */

View file

@ -0,0 +1,259 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef MEM_H_MODULE
#define MEM_H_MODULE
/*-****************************************
* Dependencies
******************************************/
#include <asm/unaligned.h> /* get_unaligned, put_unaligned* */
#include <linux/compiler.h> /* inline */
#include <linux/swab.h> /* swab32, swab64 */
#include <linux/types.h> /* size_t, ptrdiff_t */
#include "debug.h" /* DEBUG_STATIC_ASSERT */
/*-****************************************
* Compiler specifics
******************************************/
#define MEM_STATIC static inline
/*-**************************************************************
* Basic Types
*****************************************************************/
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef int16_t S16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
typedef int64_t S64;
/*-**************************************************************
* Memory I/O API
*****************************************************************/
/*=== Static platform detection ===*/
MEM_STATIC unsigned MEM_32bits(void);
MEM_STATIC unsigned MEM_64bits(void);
MEM_STATIC unsigned MEM_isLittleEndian(void);
/*=== Native unaligned read/write ===*/
MEM_STATIC U16 MEM_read16(const void* memPtr);
MEM_STATIC U32 MEM_read32(const void* memPtr);
MEM_STATIC U64 MEM_read64(const void* memPtr);
MEM_STATIC size_t MEM_readST(const void* memPtr);
MEM_STATIC void MEM_write16(void* memPtr, U16 value);
MEM_STATIC void MEM_write32(void* memPtr, U32 value);
MEM_STATIC void MEM_write64(void* memPtr, U64 value);
/*=== Little endian unaligned read/write ===*/
MEM_STATIC U16 MEM_readLE16(const void* memPtr);
MEM_STATIC U32 MEM_readLE24(const void* memPtr);
MEM_STATIC U32 MEM_readLE32(const void* memPtr);
MEM_STATIC U64 MEM_readLE64(const void* memPtr);
MEM_STATIC size_t MEM_readLEST(const void* memPtr);
MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
/*=== Big endian unaligned read/write ===*/
MEM_STATIC U32 MEM_readBE32(const void* memPtr);
MEM_STATIC U64 MEM_readBE64(const void* memPtr);
MEM_STATIC size_t MEM_readBEST(const void* memPtr);
MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
/*=== Byteswap ===*/
MEM_STATIC U32 MEM_swap32(U32 in);
MEM_STATIC U64 MEM_swap64(U64 in);
MEM_STATIC size_t MEM_swapST(size_t in);
/*-**************************************************************
* Memory I/O Implementation
*****************************************************************/
MEM_STATIC unsigned MEM_32bits(void)
{
return sizeof(size_t) == 4;
}
MEM_STATIC unsigned MEM_64bits(void)
{
return sizeof(size_t) == 8;
}
#if defined(__LITTLE_ENDIAN)
#define MEM_LITTLE_ENDIAN 1
#else
#define MEM_LITTLE_ENDIAN 0
#endif
MEM_STATIC unsigned MEM_isLittleEndian(void)
{
return MEM_LITTLE_ENDIAN;
}
MEM_STATIC U16 MEM_read16(const void *memPtr)
{
return get_unaligned((const U16 *)memPtr);
}
MEM_STATIC U32 MEM_read32(const void *memPtr)
{
return get_unaligned((const U32 *)memPtr);
}
MEM_STATIC U64 MEM_read64(const void *memPtr)
{
return get_unaligned((const U64 *)memPtr);
}
MEM_STATIC size_t MEM_readST(const void *memPtr)
{
return get_unaligned((const size_t *)memPtr);
}
MEM_STATIC void MEM_write16(void *memPtr, U16 value)
{
put_unaligned(value, (U16 *)memPtr);
}
MEM_STATIC void MEM_write32(void *memPtr, U32 value)
{
put_unaligned(value, (U32 *)memPtr);
}
MEM_STATIC void MEM_write64(void *memPtr, U64 value)
{
put_unaligned(value, (U64 *)memPtr);
}
/*=== Little endian r/w ===*/
MEM_STATIC U16 MEM_readLE16(const void *memPtr)
{
return get_unaligned_le16(memPtr);
}
MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
{
put_unaligned_le16(val, memPtr);
}
MEM_STATIC U32 MEM_readLE24(const void *memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
}
MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
{
MEM_writeLE16(memPtr, (U16)val);
((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
}
MEM_STATIC U32 MEM_readLE32(const void *memPtr)
{
return get_unaligned_le32(memPtr);
}
MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
{
put_unaligned_le32(val32, memPtr);
}
MEM_STATIC U64 MEM_readLE64(const void *memPtr)
{
return get_unaligned_le64(memPtr);
}
MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
{
put_unaligned_le64(val64, memPtr);
}
MEM_STATIC size_t MEM_readLEST(const void *memPtr)
{
if (MEM_32bits())
return (size_t)MEM_readLE32(memPtr);
else
return (size_t)MEM_readLE64(memPtr);
}
MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
{
if (MEM_32bits())
MEM_writeLE32(memPtr, (U32)val);
else
MEM_writeLE64(memPtr, (U64)val);
}
/*=== Big endian r/w ===*/
MEM_STATIC U32 MEM_readBE32(const void *memPtr)
{
return get_unaligned_be32(memPtr);
}
MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
{
put_unaligned_be32(val32, memPtr);
}
MEM_STATIC U64 MEM_readBE64(const void *memPtr)
{
return get_unaligned_be64(memPtr);
}
MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
{
put_unaligned_be64(val64, memPtr);
}
MEM_STATIC size_t MEM_readBEST(const void *memPtr)
{
if (MEM_32bits())
return (size_t)MEM_readBE32(memPtr);
else
return (size_t)MEM_readBE64(memPtr);
}
MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
{
if (MEM_32bits())
MEM_writeBE32(memPtr, (U32)val);
else
MEM_writeBE64(memPtr, (U64)val);
}
MEM_STATIC U32 MEM_swap32(U32 in)
{
return swab32(in);
}
MEM_STATIC U64 MEM_swap64(U64 in)
{
return swab64(in);
}
MEM_STATIC size_t MEM_swapST(size_t in)
{
if (MEM_32bits())
return (size_t)MEM_swap32((U32)in);
else
return (size_t)MEM_swap64((U64)in);
}
#endif /* MEM_H_MODULE */

View file

@ -0,0 +1,39 @@
# !/bin/sh
set -e
# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and
# 16 GB of RAM and an SSD.
# $BENCHMARK_DIR is generated with the following commands, from the Ubuntu image
# ubuntu-16.10-desktop-amd64.iso.
# > mkdir mnt
# > sudo mount -o loop ubuntu-16.10-desktop-amd64.iso mnt
# > cp mnt/casper/filesystem.squashfs .
# > sudo unsquashfs filesystem.squashfs
# $HOME is on a ext4 filesystem
BENCHMARK_DIR="$HOME/squashfs-root/"
BENCHMARK_FS="$HOME/filesystem.squashfs"
# Normalize the environment
sudo rm -f $BENCHMARK_FS 2> /dev/null > /dev/null || true
sudo umount /mnt/squashfs 2> /dev/null > /dev/null || true
# Run the benchmark
echo "Compression"
echo "sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@"
time sudo mksquashfs $BENCHMARK_DIR $BENCHMARK_FS $@ 2> /dev/null > /dev/null
echo "Approximate compression ratio"
printf "%d / %d\n" \
$(sudo du -sx --block-size=1 $BENCHMARK_DIR | cut -f1) \
$(sudo du -sx --block-size=1 $BENCHMARK_FS | cut -f1);
# Mount the filesystem
sudo mount -t squashfs $BENCHMARK_FS /mnt/squashfs
echo "Decompression"
time sudo tar -c /mnt/squashfs 2> /dev/null | wc -c > /dev/null
sudo umount /mnt/squashfs

View file

@ -0,0 +1,44 @@
# ################################################################
# Copyright (c) Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# You may select, at your option, one of the above-listed licenses.
# ################################################################
LINUX := ../linux
LINUX_ZSTDLIB := $(LINUX)/lib/zstd
CPPFLAGS += -I$(LINUX)/include -I$(LINUX_ZSTDLIB) -Iinclude -DNDEBUG -Wno-deprecated-declarations
# Don't poison the workspace, it currently doesn't work with static allocation and workspace reuse
CPPFLAGS += -DZSTD_ASAN_DONT_POISON_WORKSPACE
LINUX_ZSTD_MODULE := $(wildcard $(LINUX_ZSTDLIB)/*.c)
LINUX_ZSTD_COMMON := $(wildcard $(LINUX_ZSTDLIB)/common/*.c)
LINUX_ZSTD_COMPRESS := $(wildcard $(LINUX_ZSTDLIB)/compress/*.c)
LINUX_ZSTD_DECOMPRESS := $(wildcard $(LINUX_ZSTDLIB)/decompress/*.c)
LINUX_ZSTD_FILES := $(LINUX_ZSTD_MODULE) $(LINUX_ZSTD_COMMON) $(LINUX_ZSTD_COMPRESS) $(LINUX_ZSTD_DECOMPRESS)
LINUX_ZSTD_OBJECTS := $(LINUX_ZSTD_FILES:.c=.o)
liblinuxzstd.a: $(LINUX_ZSTD_OBJECTS)
$(AR) $(ARFLAGS) $@ $^
test: test.c liblinuxzstd.a
$(CC) $(LDFLAGS) $(CPPFLAGS) $(CFLAGS) $^ -o $@
static_test: static_test.c
$(CC) $(LDFLAGS) $(CPPFLAGS) $(CFLAGS) $^ -o $@
run-test: test static_test
./macro-test.sh
./test
./static_test
.PHONY:
clean:
$(RM) -f $(LINUX_ZSTDLIB)/*.o
$(RM) -f $(LINUX_ZSTDLIB)/**/*.o
$(RM) -f *.o *.a
$(RM) -f test

View file

@ -0,0 +1,186 @@
#ifndef ASM_UNALIGNED_H
#define ASM_UNALIGNED_H
#include <assert.h>
#include <linux/types.h>
#ifndef __LITTLE_ENDIAN
# if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN__)
# define __LITTLE_ENDIAN 1
# endif
#endif
#ifdef __LITTLE_ENDIAN
# define _IS_LITTLE_ENDIAN 1
#else
# define _IS_LITTLE_ENDIAN 0
#endif
static unsigned _isLittleEndian(void)
{
const union { uint32_t u; uint8_t c[4]; } one = { 1 };
assert(_IS_LITTLE_ENDIAN == one.c[0]);
return _IS_LITTLE_ENDIAN;
}
static uint16_t _swap16(uint16_t in)
{
return ((in & 0xF) << 8) + ((in & 0xF0) >> 8);
}
static uint32_t _swap32(uint32_t in)
{
return __builtin_bswap32(in);
}
static uint64_t _swap64(uint64_t in)
{
return __builtin_bswap64(in);
}
/* Little endian */
static uint16_t get_unaligned_le16(const void* memPtr)
{
uint16_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap16(val);
return val;
}
static uint32_t get_unaligned_le32(const void* memPtr)
{
uint32_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap32(val);
return val;
}
static uint64_t get_unaligned_le64(const void* memPtr)
{
uint64_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (!_isLittleEndian()) _swap64(val);
return val;
}
static void put_unaligned_le16(uint16_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap16(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_le32(uint32_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap32(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_le64(uint64_t value, void* memPtr)
{
if (!_isLittleEndian()) value = _swap64(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
/* big endian */
static uint32_t get_unaligned_be32(const void* memPtr)
{
uint32_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (_isLittleEndian()) _swap32(val);
return val;
}
static uint64_t get_unaligned_be64(const void* memPtr)
{
uint64_t val;
__builtin_memcpy(&val, memPtr, sizeof(val));
if (_isLittleEndian()) _swap64(val);
return val;
}
static void put_unaligned_be32(uint32_t value, void* memPtr)
{
if (_isLittleEndian()) value = _swap32(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
static void put_unaligned_be64(uint64_t value, void* memPtr)
{
if (_isLittleEndian()) value = _swap64(value);
__builtin_memcpy(memPtr, &value, sizeof(value));
}
/* generic */
extern void __bad_unaligned_access_size(void);
#define __get_unaligned_le(ptr) ((typeof(*(ptr)))({ \
__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \
__bad_unaligned_access_size())))); \
}))
#define __get_unaligned_be(ptr) ((typeof(*(ptr)))({ \
__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \
__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \
__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \
__bad_unaligned_access_size())))); \
}))
#define __put_unaligned_le(val, ptr) \
({ \
void *__gu_p = (ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
*(uint8_t *)__gu_p = (uint8_t)(val); \
break; \
case 2: \
put_unaligned_le16((uint16_t)(val), __gu_p); \
break; \
case 4: \
put_unaligned_le32((uint32_t)(val), __gu_p); \
break; \
case 8: \
put_unaligned_le64((uint64_t)(val), __gu_p); \
break; \
default: \
__bad_unaligned_access_size(); \
break; \
} \
(void)0; \
})
#define __put_unaligned_be(val, ptr) \
({ \
void *__gu_p = (ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
*(uint8_t *)__gu_p = (uint8_t)(val); \
break; \
case 2: \
put_unaligned_be16((uint16_t)(val), __gu_p); \
break; \
case 4: \
put_unaligned_be32((uint32_t)(val), __gu_p); \
break; \
case 8: \
put_unaligned_be64((uint64_t)(val), __gu_p); \
break; \
default: \
__bad_unaligned_access_size(); \
break; \
} \
(void)0; \
})
#if _IS_LITTLE_ENDIAN
# define get_unaligned __get_unaligned_le
# define put_unaligned __put_unaligned_le
#else
# define get_unaligned __get_unaligned_be
# define put_unaligned __put_unaligned_be
#endif
#endif // ASM_UNALIGNED_H

View file

@ -0,0 +1,21 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_COMPILER_H
#define LINUX_COMPILER_H
#ifndef inline
#define inline __inline __attribute__((unused))
#endif
#ifndef noinline
#define noinline __attribute__((noinline))
#endif
#endif

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_ERRNO_H
#define LINUX_ERRNO_H
#define EINVAL 22
#endif

View file

@ -0,0 +1,19 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_KERNEL_H
#define LINUX_KERNEL_H
#define WARN_ON(x)
#define PTR_ALIGN(p, a) (typeof(p))ALIGN((unsigned long long)(p), (a))
#define ALIGN(x, a) ALIGN_MASK((x), (a) - 1)
#define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
#endif

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_LIMITS_H
#define LINUX_LIMITS_H
#include <limits.h>
#endif

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_MATH64_H
#define LINUX_MATH64_H
#define div_u64(dividend, divisor) ((dividend) / (divisor))
#endif

View file

@ -0,0 +1,18 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_MODULE_H
#define LINUX_MODULE_H
#define EXPORT_SYMBOL(symbol) \
void* __##symbol = symbol
#define MODULE_LICENSE(license)
#define MODULE_DESCRIPTION(description)
#endif

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_PRINTK_H
#define LINUX_PRINTK_H
#define pr_debug(...)
#endif

View file

@ -0,0 +1,15 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_STDDEF_H
#define LINUX_STDDEF_H
#include <stddef.h>
#endif

View file

@ -0,0 +1,16 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_SWAB_H
#define LINUX_SWAB_H
#define swab32(x) __builtin_bswap32((x))
#define swab64(x) __builtin_bswap64((x))
#endif

View file

@ -0,0 +1,16 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef LINUX_TYPES_H
#define LINUX_TYPES_H
#include <stddef.h>
#include <stdint.h>
#endif

View file

@ -0,0 +1,746 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet.
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation. This program is dual-licensed; you may select
* either version 2 of the GNU General Public License ("GPL") or BSD license
* ("BSD").
*
* You can contact the author at:
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* Notice extracted from xxHash homepage:
*
* xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
* It also successfully passes all tests from the SMHasher suite.
*
* Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2
* Duo @3GHz)
*
* Name Speed Q.Score Author
* xxHash 5.4 GB/s 10
* CrapWow 3.2 GB/s 2 Andrew
* MumurHash 3a 2.7 GB/s 10 Austin Appleby
* SpookyHash 2.0 GB/s 10 Bob Jenkins
* SBox 1.4 GB/s 9 Bret Mulvey
* Lookup3 1.2 GB/s 9 Bob Jenkins
* SuperFastHash 1.2 GB/s 1 Paul Hsieh
* CityHash64 1.05 GB/s 10 Pike & Alakuijala
* FNV 0.55 GB/s 5 Fowler, Noll, Vo
* CRC32 0.43 GB/s 9
* MD5-32 0.33 GB/s 10 Ronald L. Rivest
* SHA1-32 0.28 GB/s 10
*
* Q.Score is a measure of quality of the hash function.
* It depends on successfully passing SMHasher test set.
* 10 is a perfect score.
*
* A 64-bits version, named xxh64 offers much better speed,
* but for 64-bits applications only.
* Name Speed on 64 bits Speed on 32 bits
* xxh64 13.8 GB/s 1.9 GB/s
* xxh32 6.8 GB/s 6.0 GB/s
*/
#ifndef XXHASH_H
#define XXHASH_H
#include <linux/types.h>
#define XXH_API static inline __attribute__((unused))
/*-****************************
* Simple Hash Functions
*****************************/
/**
* xxh32() - calculate the 32-bit hash of the input with a given seed.
*
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
*
* Return: The 32-bit hash of the data.
*/
XXH_API uint32_t xxh32(const void *input, size_t length, uint32_t seed);
/**
* xxh64() - calculate the 64-bit hash of the input with a given seed.
*
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* This function runs 2x faster on 64-bit systems, but slower on 32-bit systems.
*
* Return: The 64-bit hash of the data.
*/
XXH_API uint64_t xxh64(const void *input, size_t length, uint64_t seed);
/**
* xxhash() - calculate wordsize hash of the input with a given seed
* @input: The data to hash.
* @length: The length of the data to hash.
* @seed: The seed can be used to alter the result predictably.
*
* If the hash does not need to be comparable between machines with
* different word sizes, this function will call whichever of xxh32()
* or xxh64() is faster.
*
* Return: wordsize hash of the data.
*/
static inline unsigned long xxhash(const void *input, size_t length,
uint64_t seed)
{
#if BITS_PER_LONG == 64
return xxh64(input, length, seed);
#else
return xxh32(input, length, seed);
#endif
}
/*-****************************
* Streaming Hash Functions
*****************************/
/*
* These definitions are only meant to allow allocation of XXH state
* statically, on stack, or in a struct for example.
* Do not use members directly.
*/
/**
* struct xxh32_state - private xxh32 state, do not use members directly
*/
struct xxh32_state {
uint32_t total_len_32;
uint32_t large_len;
uint32_t v1;
uint32_t v2;
uint32_t v3;
uint32_t v4;
uint32_t mem32[4];
uint32_t memsize;
};
/**
* struct xxh32_state - private xxh64 state, do not use members directly
*/
struct xxh64_state {
uint64_t total_len;
uint64_t v1;
uint64_t v2;
uint64_t v3;
uint64_t v4;
uint64_t mem64[4];
uint32_t memsize;
};
/**
* xxh32_reset() - reset the xxh32 state to start a new hashing operation
*
* @state: The xxh32 state to reset.
* @seed: Initialize the hash state with this seed.
*
* Call this function on any xxh32_state to prepare for a new hashing operation.
*/
XXH_API void xxh32_reset(struct xxh32_state *state, uint32_t seed);
/**
* xxh32_update() - hash the data given and update the xxh32 state
*
* @state: The xxh32 state to update.
* @input: The data to hash.
* @length: The length of the data to hash.
*
* After calling xxh32_reset() call xxh32_update() as many times as necessary.
*
* Return: Zero on success, otherwise an error code.
*/
XXH_API int xxh32_update(struct xxh32_state *state, const void *input, size_t length);
/**
* xxh32_digest() - produce the current xxh32 hash
*
* @state: Produce the current xxh32 hash of this state.
*
* A hash value can be produced at any time. It is still possible to continue
* inserting input into the hash state after a call to xxh32_digest(), and
* generate new hashes later on, by calling xxh32_digest() again.
*
* Return: The xxh32 hash stored in the state.
*/
XXH_API uint32_t xxh32_digest(const struct xxh32_state *state);
/**
* xxh64_reset() - reset the xxh64 state to start a new hashing operation
*
* @state: The xxh64 state to reset.
* @seed: Initialize the hash state with this seed.
*/
XXH_API void xxh64_reset(struct xxh64_state *state, uint64_t seed);
/**
* xxh64_update() - hash the data given and update the xxh64 state
* @state: The xxh64 state to update.
* @input: The data to hash.
* @length: The length of the data to hash.
*
* After calling xxh64_reset() call xxh64_update() as many times as necessary.
*
* Return: Zero on success, otherwise an error code.
*/
XXH_API int xxh64_update(struct xxh64_state *state, const void *input, size_t length);
/**
* xxh64_digest() - produce the current xxh64 hash
*
* @state: Produce the current xxh64 hash of this state.
*
* A hash value can be produced at any time. It is still possible to continue
* inserting input into the hash state after a call to xxh64_digest(), and
* generate new hashes later on, by calling xxh64_digest() again.
*
* Return: The xxh64 hash stored in the state.
*/
XXH_API uint64_t xxh64_digest(const struct xxh64_state *state);
/*-**************************
* Utils
***************************/
/**
* xxh32_copy_state() - copy the source state into the destination state
*
* @src: The source xxh32 state.
* @dst: The destination xxh32 state.
*/
XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src);
/**
* xxh64_copy_state() - copy the source state into the destination state
*
* @src: The source xxh64 state.
* @dst: The destination xxh64 state.
*/
XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src);
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet.
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation. This program is dual-licensed; you may select
* either version 2 of the GNU General Public License ("GPL") or BSD license
* ("BSD").
*
* You can contact the author at:
* - xxHash homepage: https://cyan4973.github.io/xxHash/
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
#include <asm/unaligned.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/xxhash.h>
/*-*************************************
* Macros
**************************************/
#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
#define xxh_rotl64(x, r) ((x << r) | (x >> (64 - r)))
#ifdef __LITTLE_ENDIAN
# define XXH_CPU_LITTLE_ENDIAN 1
#else
# define XXH_CPU_LITTLE_ENDIAN 0
#endif
/*-*************************************
* Constants
**************************************/
static const uint32_t PRIME32_1 = 2654435761U;
static const uint32_t PRIME32_2 = 2246822519U;
static const uint32_t PRIME32_3 = 3266489917U;
static const uint32_t PRIME32_4 = 668265263U;
static const uint32_t PRIME32_5 = 374761393U;
static const uint64_t PRIME64_1 = 11400714785074694791ULL;
static const uint64_t PRIME64_2 = 14029467366897019727ULL;
static const uint64_t PRIME64_3 = 1609587929392839161ULL;
static const uint64_t PRIME64_4 = 9650029242287828579ULL;
static const uint64_t PRIME64_5 = 2870177450012600261ULL;
/*-**************************
* Utils
***************************/
XXH_API void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src)
{
__builtin_memcpy(dst, src, sizeof(*dst));
}
XXH_API void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src)
{
__builtin_memcpy(dst, src, sizeof(*dst));
}
/*-***************************
* Simple Hash Functions
****************************/
static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
{
seed += input * PRIME32_2;
seed = xxh_rotl32(seed, 13);
seed *= PRIME32_1;
return seed;
}
XXH_API uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *b_end = p + len;
uint32_t h32;
if (len >= 16) {
const uint8_t *const limit = b_end - 16;
uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
uint32_t v2 = seed + PRIME32_2;
uint32_t v3 = seed + 0;
uint32_t v4 = seed - PRIME32_1;
do {
v1 = xxh32_round(v1, get_unaligned_le32(p));
p += 4;
v2 = xxh32_round(v2, get_unaligned_le32(p));
p += 4;
v3 = xxh32_round(v3, get_unaligned_le32(p));
p += 4;
v4 = xxh32_round(v4, get_unaligned_le32(p));
p += 4;
} while (p <= limit);
h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
} else {
h32 = seed + PRIME32_5;
}
h32 += (uint32_t)len;
while (p + 4 <= b_end) {
h32 += get_unaligned_le32(p) * PRIME32_3;
h32 = xxh_rotl32(h32, 17) * PRIME32_4;
p += 4;
}
while (p < b_end) {
h32 += (*p) * PRIME32_5;
h32 = xxh_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
static uint64_t xxh64_round(uint64_t acc, const uint64_t input)
{
acc += input * PRIME64_2;
acc = xxh_rotl64(acc, 31);
acc *= PRIME64_1;
return acc;
}
static uint64_t xxh64_merge_round(uint64_t acc, uint64_t val)
{
val = xxh64_round(0, val);
acc ^= val;
acc = acc * PRIME64_1 + PRIME64_4;
return acc;
}
XXH_API uint64_t xxh64(const void *input, const size_t len, const uint64_t seed)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
uint64_t h64;
if (len >= 32) {
const uint8_t *const limit = b_end - 32;
uint64_t v1 = seed + PRIME64_1 + PRIME64_2;
uint64_t v2 = seed + PRIME64_2;
uint64_t v3 = seed + 0;
uint64_t v4 = seed - PRIME64_1;
do {
v1 = xxh64_round(v1, get_unaligned_le64(p));
p += 8;
v2 = xxh64_round(v2, get_unaligned_le64(p));
p += 8;
v3 = xxh64_round(v3, get_unaligned_le64(p));
p += 8;
v4 = xxh64_round(v4, get_unaligned_le64(p));
p += 8;
} while (p <= limit);
h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
h64 = xxh64_merge_round(h64, v1);
h64 = xxh64_merge_round(h64, v2);
h64 = xxh64_merge_round(h64, v3);
h64 = xxh64_merge_round(h64, v4);
} else {
h64 = seed + PRIME64_5;
}
h64 += (uint64_t)len;
while (p + 8 <= b_end) {
const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
h64 ^= k1;
h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
p += 8;
}
if (p + 4 <= b_end) {
h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p += 4;
}
while (p < b_end) {
h64 ^= (*p) * PRIME64_5;
h64 = xxh_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
/*-**************************************************
* Advanced Hash Functions
***************************************************/
XXH_API void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed)
{
/* use a local state for memcpy() to avoid strict-aliasing warnings */
struct xxh32_state state;
__builtin_memset(&state, 0, sizeof(state));
state.v1 = seed + PRIME32_1 + PRIME32_2;
state.v2 = seed + PRIME32_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME32_1;
__builtin_memcpy(statePtr, &state, sizeof(state));
}
XXH_API void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed)
{
/* use a local state for memcpy() to avoid strict-aliasing warnings */
struct xxh64_state state;
__builtin_memset(&state, 0, sizeof(state));
state.v1 = seed + PRIME64_1 + PRIME64_2;
state.v2 = seed + PRIME64_2;
state.v3 = seed + 0;
state.v4 = seed - PRIME64_1;
__builtin_memcpy(statePtr, &state, sizeof(state));
}
XXH_API int xxh32_update(struct xxh32_state *state, const void *input, const size_t len)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
if (input == NULL)
return -EINVAL;
state->total_len_32 += (uint32_t)len;
state->large_len |= (len >= 16) | (state->total_len_32 >= 16);
if (state->memsize + len < 16) { /* fill in tmp buffer */
__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input, len);
state->memsize += (uint32_t)len;
return 0;
}
if (state->memsize) { /* some data left from previous update */
const uint32_t *p32 = state->mem32;
__builtin_memcpy((uint8_t *)(state->mem32) + state->memsize, input,
16 - state->memsize);
state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32));
p32++;
state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32));
p32++;
state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32));
p32++;
state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32));
p32++;
p += 16-state->memsize;
state->memsize = 0;
}
if (p <= b_end - 16) {
const uint8_t *const limit = b_end - 16;
uint32_t v1 = state->v1;
uint32_t v2 = state->v2;
uint32_t v3 = state->v3;
uint32_t v4 = state->v4;
do {
v1 = xxh32_round(v1, get_unaligned_le32(p));
p += 4;
v2 = xxh32_round(v2, get_unaligned_le32(p));
p += 4;
v3 = xxh32_round(v3, get_unaligned_le32(p));
p += 4;
v4 = xxh32_round(v4, get_unaligned_le32(p));
p += 4;
} while (p <= limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < b_end) {
__builtin_memcpy(state->mem32, p, (size_t)(b_end-p));
state->memsize = (uint32_t)(b_end-p);
}
return 0;
}
XXH_API uint32_t xxh32_digest(const struct xxh32_state *state)
{
const uint8_t *p = (const uint8_t *)state->mem32;
const uint8_t *const b_end = (const uint8_t *)(state->mem32) +
state->memsize;
uint32_t h32;
if (state->large_len) {
h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) +
xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18);
} else {
h32 = state->v3 /* == seed */ + PRIME32_5;
}
h32 += state->total_len_32;
while (p + 4 <= b_end) {
h32 += get_unaligned_le32(p) * PRIME32_3;
h32 = xxh_rotl32(h32, 17) * PRIME32_4;
p += 4;
}
while (p < b_end) {
h32 += (*p) * PRIME32_5;
h32 = xxh_rotl32(h32, 11) * PRIME32_1;
p++;
}
h32 ^= h32 >> 15;
h32 *= PRIME32_2;
h32 ^= h32 >> 13;
h32 *= PRIME32_3;
h32 ^= h32 >> 16;
return h32;
}
XXH_API int xxh64_update(struct xxh64_state *state, const void *input, const size_t len)
{
const uint8_t *p = (const uint8_t *)input;
const uint8_t *const b_end = p + len;
if (input == NULL)
return -EINVAL;
state->total_len += len;
if (state->memsize + len < 32) { /* fill in tmp buffer */
__builtin_memcpy(((uint8_t *)state->mem64) + state->memsize, input, len);
state->memsize += (uint32_t)len;
return 0;
}
if (state->memsize) { /* tmp buffer is full */
uint64_t *p64 = state->mem64;
__builtin_memcpy(((uint8_t *)p64) + state->memsize, input,
32 - state->memsize);
state->v1 = xxh64_round(state->v1, get_unaligned_le64(p64));
p64++;
state->v2 = xxh64_round(state->v2, get_unaligned_le64(p64));
p64++;
state->v3 = xxh64_round(state->v3, get_unaligned_le64(p64));
p64++;
state->v4 = xxh64_round(state->v4, get_unaligned_le64(p64));
p += 32 - state->memsize;
state->memsize = 0;
}
if (p + 32 <= b_end) {
const uint8_t *const limit = b_end - 32;
uint64_t v1 = state->v1;
uint64_t v2 = state->v2;
uint64_t v3 = state->v3;
uint64_t v4 = state->v4;
do {
v1 = xxh64_round(v1, get_unaligned_le64(p));
p += 8;
v2 = xxh64_round(v2, get_unaligned_le64(p));
p += 8;
v3 = xxh64_round(v3, get_unaligned_le64(p));
p += 8;
v4 = xxh64_round(v4, get_unaligned_le64(p));
p += 8;
} while (p <= limit);
state->v1 = v1;
state->v2 = v2;
state->v3 = v3;
state->v4 = v4;
}
if (p < b_end) {
__builtin_memcpy(state->mem64, p, (size_t)(b_end-p));
state->memsize = (uint32_t)(b_end - p);
}
return 0;
}
XXH_API uint64_t xxh64_digest(const struct xxh64_state *state)
{
const uint8_t *p = (const uint8_t *)state->mem64;
const uint8_t *const b_end = (const uint8_t *)state->mem64 +
state->memsize;
uint64_t h64;
if (state->total_len >= 32) {
const uint64_t v1 = state->v1;
const uint64_t v2 = state->v2;
const uint64_t v3 = state->v3;
const uint64_t v4 = state->v4;
h64 = xxh_rotl64(v1, 1) + xxh_rotl64(v2, 7) +
xxh_rotl64(v3, 12) + xxh_rotl64(v4, 18);
h64 = xxh64_merge_round(h64, v1);
h64 = xxh64_merge_round(h64, v2);
h64 = xxh64_merge_round(h64, v3);
h64 = xxh64_merge_round(h64, v4);
} else {
h64 = state->v3 + PRIME64_5;
}
h64 += (uint64_t)state->total_len;
while (p + 8 <= b_end) {
const uint64_t k1 = xxh64_round(0, get_unaligned_le64(p));
h64 ^= k1;
h64 = xxh_rotl64(h64, 27) * PRIME64_1 + PRIME64_4;
p += 8;
}
if (p + 4 <= b_end) {
h64 ^= (uint64_t)(get_unaligned_le32(p)) * PRIME64_1;
h64 = xxh_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
p += 4;
}
while (p < b_end) {
h64 ^= (*p) * PRIME64_5;
h64 = xxh_rotl64(h64, 11) * PRIME64_1;
p++;
}
h64 ^= h64 >> 33;
h64 *= PRIME64_2;
h64 ^= h64 >> 29;
h64 *= PRIME64_3;
h64 ^= h64 >> 32;
return h64;
}
#endif /* XXHASH_H */

View file

@ -0,0 +1,44 @@
#!/usr/bin/env sh
set -e
SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
INCLUDE_DIR="$SCRIPT_DIR/../linux/include"
LIB_DIR="$SCRIPT_DIR/../linux/lib"
print() {
printf '%b' "${*}"
}
println() {
printf '%b\n' "${*}"
}
die() {
println "$@" 1>&2
exit 1
}
test_not_present() {
print "Testing that '$1' is not present... "
grep -r $1 "$INCLUDE_DIR" "$LIB_DIR" && die "Fail!"
println "Okay"
}
println "This test checks that the macro removal process worked as expected"
println "If this test fails, then freestanding.py wasn't able to remove one of these"
println "macros from the source code completely. You'll either need to rewrite the check"
println "or improve freestanding.py."
println ""
test_not_present "ZSTD_NO_INTRINSICS"
test_not_present "ZSTD_NO_UNUSED_FUNCTIONS"
test_not_present "ZSTD_LEGACY_SUPPORT"
test_not_present "STATIC_BMI2"
test_not_present "ZSTD_NO_INLINE"
test_not_present "ZSTD_DLL_EXPORT"
test_not_present "ZSTD_DLL_IMPORT"
test_not_present "__ICCARM__"
test_not_present "_MSC_VER"
test_not_present "_WIN32"

View file

@ -0,0 +1,50 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "decompress_sources.h"
#include <linux/zstd.h>
#define CONTROL(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%u: %s failed!\n", __FUNCTION__, __LINE__, #x); \
abort(); \
} \
} while (0)
static const char kEmptyZstdFrame[] = {
0x28, 0xb5, 0x2f, 0xfd, 0x24, 0x00, 0x01, 0x00, 0x00, 0x99, 0xe9, 0xd8, 0x51
};
static void test_decompress_unzstd() {
fprintf(stderr, "Testing decompress unzstd... ");
{
size_t const wkspSize = zstd_dctx_workspace_bound();
void* wksp = malloc(wkspSize);
CONTROL(wksp != NULL);
ZSTD_DCtx* dctx = zstd_init_dctx(wksp, wkspSize);
CONTROL(dctx != NULL);
size_t const dSize = zstd_decompress_dctx(dctx, NULL, 0, kEmptyZstdFrame, sizeof(kEmptyZstdFrame));
CONTROL(!zstd_is_error(dSize));
CONTROL(dSize == 0);
free(wksp);
}
fprintf(stderr, "Ok\n");
}
int main(void) {
test_decompress_unzstd();
return 0;
}

View file

@ -0,0 +1,219 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/zstd.h>
#define CONTROL(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%u: %s failed!\n", __FUNCTION__, __LINE__, #x); \
abort(); \
} \
} while (0)
typedef struct {
char *data;
char *data2;
size_t dataSize;
char *comp;
size_t compSize;
} test_data_t;
test_data_t create_test_data(void) {
test_data_t data;
data.dataSize = 128 * 1024;
data.data = malloc(data.dataSize);
CONTROL(data.data != NULL);
data.data2 = malloc(data.dataSize);
CONTROL(data.data2 != NULL);
data.compSize = zstd_compress_bound(data.dataSize);
data.comp = malloc(data.compSize);
CONTROL(data.comp != NULL);
memset(data.data, 0, data.dataSize);
return data;
}
static void free_test_data(test_data_t const *data) {
free(data->data);
free(data->data2);
free(data->comp);
}
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static void test_btrfs(test_data_t const *data) {
fprintf(stderr, "testing btrfs use cases... ");
size_t const size = MIN(data->dataSize, 128 * 1024);
for (int level = -1; level < 16; ++level) {
zstd_parameters params = zstd_get_params(level, size);
CONTROL(params.cParams.windowLog <= 17);
size_t const workspaceSize =
MAX(zstd_cstream_workspace_bound(&params.cParams),
zstd_dstream_workspace_bound(size));
void *workspace = malloc(workspaceSize);
CONTROL(workspace != NULL);
char const *ip = data->data;
char const *iend = ip + size;
char *op = data->comp;
char *oend = op + data->compSize;
{
zstd_cstream *cctx = zstd_init_cstream(&params, size, workspace, workspaceSize);
CONTROL(cctx != NULL);
zstd_out_buffer out = {NULL, 0, 0};
zstd_in_buffer in = {NULL, 0, 0};
for (;;) {
if (in.pos == in.size) {
in.src = ip;
in.size = MIN(4096, iend - ip);
in.pos = 0;
ip += in.size;
}
if (out.pos == out.size) {
out.dst = op;
out.size = MIN(4096, oend - op);
out.pos = 0;
op += out.size;
}
if (ip != iend || in.pos < in.size) {
CONTROL(!zstd_is_error(zstd_compress_stream(cctx, &out, &in)));
} else {
size_t const ret = zstd_end_stream(cctx, &out);
CONTROL(!zstd_is_error(ret));
if (ret == 0) {
break;
}
}
}
op += out.pos;
}
ip = data->comp;
iend = op;
op = data->data2;
oend = op + size;
{
zstd_dstream *dctx = zstd_init_dstream(1ULL << params.cParams.windowLog, workspace, workspaceSize);
CONTROL(dctx != NULL);
zstd_out_buffer out = {NULL, 0, 0};
zstd_in_buffer in = {NULL, 0, 0};
for (;;) {
if (in.pos == in.size) {
in.src = ip;
in.size = MIN(4096, iend - ip);
in.pos = 0;
ip += in.size;
}
if (out.pos == out.size) {
out.dst = op;
out.size = MIN(4096, oend - op);
out.pos = 0;
op += out.size;
}
size_t const ret = zstd_decompress_stream(dctx, &out, &in);
CONTROL(!zstd_is_error(ret));
if (ret == 0) {
break;
}
}
}
CONTROL(op - data->data2 == data->dataSize);
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
free(workspace);
}
fprintf(stderr, "Ok\n");
}
static void test_decompress_unzstd(test_data_t const *data) {
fprintf(stderr, "Testing decompress unzstd... ");
size_t cSize;
{
zstd_parameters params = zstd_get_params(19, 0);
size_t const wkspSize = zstd_cctx_workspace_bound(&params.cParams);
void* wksp = malloc(wkspSize);
CONTROL(wksp != NULL);
zstd_cctx* cctx = zstd_init_cctx(wksp, wkspSize);
CONTROL(cctx != NULL);
cSize = zstd_compress_cctx(cctx, data->comp, data->compSize, data->data, data->dataSize, &params);
CONTROL(!zstd_is_error(cSize));
free(wksp);
}
{
size_t const wkspSize = zstd_dctx_workspace_bound();
void* wksp = malloc(wkspSize);
CONTROL(wksp != NULL);
zstd_dctx* dctx = zstd_init_dctx(wksp, wkspSize);
CONTROL(dctx != NULL);
size_t const dSize = zstd_decompress_dctx(dctx, data->data2, data->dataSize, data->comp, cSize);
CONTROL(!zstd_is_error(dSize));
CONTROL(dSize == data->dataSize);
CONTROL(!memcmp(data->data, data->data2, data->dataSize));
free(wksp);
}
fprintf(stderr, "Ok\n");
}
static void test_f2fs() {
fprintf(stderr, "testing f2fs uses... ");
CONTROL(zstd_min_clevel() < 0);
CONTROL(zstd_max_clevel() == 22);
fprintf(stderr, "Ok\n");
}
static char *g_stack = NULL;
static void __attribute__((noinline)) use(void *x) {
asm volatile("" : "+r"(x));
}
static void __attribute__((noinline)) set_stack() {
char stack[8192];
g_stack = stack;
memset(g_stack, 0x33, 8192);
use(g_stack);
}
static void __attribute__((noinline)) check_stack() {
size_t cleanStack = 0;
while (cleanStack < 8192 && g_stack[cleanStack] == 0x33) {
++cleanStack;
}
size_t const stackSize = 8192 - cleanStack;
fprintf(stderr, "Maximum stack size: %zu\n", stackSize);
CONTROL(stackSize <= 2048 + 512);
}
static void test_stack_usage(test_data_t const *data) {
set_stack();
test_f2fs();
test_btrfs(data);
test_decompress_unzstd(data);
check_stack();
}
int main(void) {
test_data_t data = create_test_data();
test_f2fs();
test_btrfs(&data);
test_decompress_unzstd(&data);
test_stack_usage(&data);
free_test_data(&data);
return 0;
}

View file

@ -0,0 +1,124 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
#include "common/zstd_deps.h"
#include "common/zstd_internal.h"
int zstd_min_clevel(void)
{
return ZSTD_minCLevel();
}
EXPORT_SYMBOL(zstd_min_clevel);
int zstd_max_clevel(void)
{
return ZSTD_maxCLevel();
}
EXPORT_SYMBOL(zstd_max_clevel);
size_t zstd_compress_bound(size_t src_size)
{
return ZSTD_compressBound(src_size);
}
EXPORT_SYMBOL(zstd_compress_bound);
zstd_parameters zstd_get_params(int level,
unsigned long long estimated_src_size)
{
return ZSTD_getParams(level, estimated_src_size, 0);
}
EXPORT_SYMBOL(zstd_get_params);
size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCCtxSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cctx_workspace_bound);
zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticCCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_cctx);
size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size, const zstd_parameters *parameters)
{
return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, *parameters);
}
EXPORT_SYMBOL(zstd_compress_cctx);
size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
{
return ZSTD_estimateCStreamSize_usingCParams(*cparams);
}
EXPORT_SYMBOL(zstd_cstream_workspace_bound);
zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
{
zstd_cstream *cstream;
size_t ret;
if (workspace == NULL)
return NULL;
cstream = ZSTD_initStaticCStream(workspace, workspace_size);
if (cstream == NULL)
return NULL;
/* 0 means unknown in linux zstd API but means 0 in new zstd API */
if (pledged_src_size == 0)
pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
ret = ZSTD_initCStream_advanced(cstream, NULL, 0, *parameters, pledged_src_size);
if (ZSTD_isError(ret))
return NULL;
return cstream;
}
EXPORT_SYMBOL(zstd_init_cstream);
size_t zstd_reset_cstream(zstd_cstream *cstream,
unsigned long long pledged_src_size)
{
return ZSTD_resetCStream(cstream, pledged_src_size);
}
EXPORT_SYMBOL(zstd_reset_cstream);
size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_compressStream(cstream, output, input);
}
EXPORT_SYMBOL(zstd_compress_stream);
size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_flushStream(cstream, output);
}
EXPORT_SYMBOL(zstd_flush_stream);
size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
{
return ZSTD_endStream(cstream, output);
}
EXPORT_SYMBOL(zstd_end_stream);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Compressor");

View file

@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/zstd.h>
#include "common/zstd_deps.h"
/* Common symbols. zstd_compress must depend on zstd_decompress. */
unsigned int zstd_is_error(size_t code)
{
return ZSTD_isError(code);
}
EXPORT_SYMBOL(zstd_is_error);
zstd_error_code zstd_get_error_code(size_t code)
{
return ZSTD_getErrorCode(code);
}
EXPORT_SYMBOL(zstd_get_error_code);
const char *zstd_get_error_name(size_t code)
{
return ZSTD_getErrorName(code);
}
EXPORT_SYMBOL(zstd_get_error_name);
/* Decompression symbols. */
size_t zstd_dctx_workspace_bound(void)
{
return ZSTD_estimateDCtxSize();
}
EXPORT_SYMBOL(zstd_dctx_workspace_bound);
zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
{
if (workspace == NULL)
return NULL;
return ZSTD_initStaticDCtx(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dctx);
size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
const void *src, size_t src_size)
{
return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
}
EXPORT_SYMBOL(zstd_decompress_dctx);
size_t zstd_dstream_workspace_bound(size_t max_window_size)
{
return ZSTD_estimateDStreamSize(max_window_size);
}
EXPORT_SYMBOL(zstd_dstream_workspace_bound);
zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
size_t workspace_size)
{
if (workspace == NULL)
return NULL;
(void)max_window_size;
return ZSTD_initStaticDStream(workspace, workspace_size);
}
EXPORT_SYMBOL(zstd_init_dstream);
size_t zstd_reset_dstream(zstd_dstream *dstream)
{
return ZSTD_resetDStream(dstream);
}
EXPORT_SYMBOL(zstd_reset_dstream);
size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
zstd_in_buffer *input)
{
return ZSTD_decompressStream(dstream, output, input);
}
EXPORT_SYMBOL(zstd_decompress_stream);
size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
{
return ZSTD_findFrameCompressedSize(src, src_size);
}
EXPORT_SYMBOL(zstd_find_frame_compressed_size);
size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
size_t src_size)
{
return ZSTD_getFrameHeader(header, src, src_size);
}
EXPORT_SYMBOL(zstd_get_frame_header);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("Zstd Decompressor");

View file

@ -0,0 +1,125 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*
* This file provides common libc dependencies that zstd requires.
* The purpose is to allow replacing this file with a custom implementation
* to compile zstd without libc support.
*/
/* Need:
* NULL
* INT_MAX
* UINT_MAX
* ZSTD_memcpy()
* ZSTD_memset()
* ZSTD_memmove()
*/
#ifndef ZSTD_DEPS_COMMON
#define ZSTD_DEPS_COMMON
#include <linux/limits.h>
#include <linux/stddef.h>
#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
#endif /* ZSTD_DEPS_COMMON */
/*
* Define malloc as always failing. That means the user must
* either use ZSTD_customMem or statically allocate memory.
* Need:
* ZSTD_malloc()
* ZSTD_free()
* ZSTD_calloc()
*/
#ifdef ZSTD_DEPS_NEED_MALLOC
#ifndef ZSTD_DEPS_MALLOC
#define ZSTD_DEPS_MALLOC
#define ZSTD_malloc(s) ({ (void)(s); NULL; })
#define ZSTD_free(p) ((void)(p))
#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
#endif /* ZSTD_DEPS_MALLOC */
#endif /* ZSTD_DEPS_NEED_MALLOC */
/*
* Provides 64-bit math support.
* Need:
* U64 ZSTD_div64(U64 dividend, U32 divisor)
*/
#ifdef ZSTD_DEPS_NEED_MATH64
#ifndef ZSTD_DEPS_MATH64
#define ZSTD_DEPS_MATH64
#include <linux/math64.h>
static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
return div_u64(dividend, divisor);
}
#endif /* ZSTD_DEPS_MATH64 */
#endif /* ZSTD_DEPS_NEED_MATH64 */
/*
* This is only requested when DEBUGLEVEL >= 1, meaning
* it is disabled in production.
* Need:
* assert()
*/
#ifdef ZSTD_DEPS_NEED_ASSERT
#ifndef ZSTD_DEPS_ASSERT
#define ZSTD_DEPS_ASSERT
#include <linux/kernel.h>
#define assert(x) WARN_ON((x))
#endif /* ZSTD_DEPS_ASSERT */
#endif /* ZSTD_DEPS_NEED_ASSERT */
/*
* This is only requested when DEBUGLEVEL >= 2, meaning
* it is disabled in production.
* Need:
* ZSTD_DEBUG_PRINT()
*/
#ifdef ZSTD_DEPS_NEED_IO
#ifndef ZSTD_DEPS_IO
#define ZSTD_DEPS_IO
#include <linux/printk.h>
#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
#endif /* ZSTD_DEPS_IO */
#endif /* ZSTD_DEPS_NEED_IO */
/*
* Only requested when MSAN is enabled.
* Need:
* intptr_t
*/
#ifdef ZSTD_DEPS_NEED_STDINT
#ifndef ZSTD_DEPS_STDINT
#define ZSTD_DEPS_STDINT
/*
* The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
* is an unsigned long.
*/
typedef long intptr_t;
#endif /* ZSTD_DEPS_STDINT */
#endif /* ZSTD_DEPS_NEED_STDINT */

View file

@ -0,0 +1,42 @@
## Edit Distance Match Finder
```
/* This match finder leverages techniques used in file comparison algorithms
* to find matches between a dictionary and a source file.
*
* The original motivation for studying this approach was to try and optimize
* Zstandard for the use case of patching: the most common scenario being
* updating an existing software package with the next version. When patching,
* the difference between the old version of the package and the new version
* is generally tiny (most of the new file will be identical to
* the old one). In more technical terms, the edit distance (the minimal number
* of changes required to take one sequence of bytes to another) between the
* files would be small relative to the size of the file.
*
* Various 'diffing' algorithms utilize this notion of edit distance and
* the corrensponding concept of a minimal edit script between two
* sequences to identify the regions within two files where they differ.
* The core algorithm used in this match finder is described in:
*
* "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers,
* Algorithmica Vol. 1, 1986, pp. 251-266,
* <https://doi.org/10.1007/BF01840446>.
*
* Additional algorithmic heuristics for speed improvement have also been included.
* These we inspired from implementations of various regular and binary diffing
* algorithms such as GNU diff, bsdiff, and Xdelta.
*
* Note: after some experimentation, this approach proved to not provide enough
* utility to justify the additional CPU used in finding matches. The one area
* where this approach consistenly outperforms Zstandard even on level 19 is
* when compressing small files (<10 KB) using a equally small dictionary that
* is very similar to the source file. For the use case that this was intended,
* (large similar files) this approach by itself took 5-10X longer than zstd-19 and
* generally resulted in 2-3X larger files. The core advantage that zstd-19 has
* over this appraoch for match finding is the overlapping matches. This approach
* cannot find any.
*
* I'm leaving this in the contrib section in case this ever becomes interesting
* to explore again.
* */
```

View file

@ -0,0 +1,558 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
/* Currently relies on qsort when combining contiguous matches. This can probabily
* be avoided but would require changes to the algorithm. The qsort is far from
* the bottleneck in this algorithm even for medium sized files so it's probably
* not worth trying to address */
#include <stdlib.h>
#include <assert.h>
#include "zstd_edist.h"
#include "mem.h"
/*-*************************************
* Constants
***************************************/
/* Just a sential for the entires of the diagnomal matrix */
#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)
/* How large should a snake be to be considered a 'big' snake.
* For an explanation of what a 'snake' is with respect to the
* edit distance matrix, see the linked paper in zstd_edist.h */
#define ZSTD_EDIST_SNAKE_THRESH 20
/* After how many iterations should we start to use the heuristic
* based on 'big' snakes */
#define ZSTD_EDIST_SNAKE_ITER_THRESH 200
/* After how many iterations should be just give up and take
* the best availabe edit script for this round */
#define ZSTD_EDIST_EXPENSIVE_THRESH 1024
/*-*************************************
* Structures
***************************************/
typedef struct {
U32 dictIdx;
U32 srcIdx;
U32 matchLength;
} ZSTD_eDist_match;
typedef struct {
const BYTE* dict;
const BYTE* src;
size_t dictSize;
size_t srcSize;
S32* forwardDiag; /* Entires of the forward diagonal stored here */
S32* backwardDiag; /* Entires of the backward diagonal stored here.
* Note: this buffer and the 'forwardDiag' buffer
* are contiguous. See the ZSTD_eDist_genSequences */
ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer.
* In a subsequence post-processing step, we combine
* contiguous matches. */
U32 nbMatches;
} ZSTD_eDist_state;
typedef struct {
S32 dictMid; /* The mid diagonal for the dictionary */
S32 srcMid; /* The mid diagonal for the source */
int lowUseHeuristics; /* Should we use heuristics for the low part */
int highUseHeuristics; /* Should we use heuristics for the high part */
} ZSTD_eDist_partition;
/*-*************************************
* Internal
***************************************/
static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
ZSTD_eDist_partition* partition,
S32 dictLow, S32 dictHigh, S32 srcLow,
S32 srcHigh, int useHeuristics)
{
S32* const forwardDiag = state->forwardDiag;
S32* const backwardDiag = state->backwardDiag;
const BYTE* const dict = state->dict;
const BYTE* const src = state->src;
S32 const diagMin = dictLow - srcHigh;
S32 const diagMax = dictHigh - srcLow;
S32 const forwardMid = dictLow - srcLow;
S32 const backwardMid = dictHigh - srcHigh;
S32 forwardMin = forwardMid;
S32 forwardMax = forwardMid;
S32 backwardMin = backwardMid;
S32 backwardMax = backwardMid;
int odd = (forwardMid - backwardMid) & 1;
U32 iterations;
forwardDiag[forwardMid] = dictLow;
backwardDiag[backwardMid] = dictHigh;
/* Main loop for updating diag entries. Unless useHeuristics is
* set to false, this loop will run until it finds the minimal
* edit script */
for (iterations = 1;;iterations++) {
S32 diag;
int bigSnake = 0;
if (forwardMin > diagMin) {
forwardMin--;
forwardDiag[forwardMin - 1] = -1;
} else {
forwardMin++;
}
if (forwardMax < diagMax) {
forwardMax++;
forwardDiag[forwardMax + 1] = -1;
} else {
forwardMax--;
}
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 dictIdx;
S32 srcIdx;
S32 low = forwardDiag[diag - 1];
S32 high = forwardDiag[diag + 1];
S32 dictIdx0 = low < high ? high : low + 1;
for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
dictIdx++, srcIdx++) continue;
if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
bigSnake = 1;
forwardDiag[diag] = dictIdx;
if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 0;
return;
}
}
if (backwardMin > diagMin) {
backwardMin--;
backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
} else {
backwardMin++;
}
if (backwardMax < diagMax) {
backwardMax++;
backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
} else {
backwardMax--;
}
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 dictIdx;
S32 srcIdx;
S32 low = backwardDiag[diag - 1];
S32 high = backwardDiag[diag + 1];
S32 dictIdx0 = low < high ? low : high - 1;
for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
dictIdx--, srcIdx--) continue;
if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
bigSnake = 1;
backwardDiag[diag] = dictIdx;
if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 0;
return;
}
}
if (!useHeuristics)
continue;
/* Everything under this point is a heuritic. Using these will
* substantially speed up the match finding. In some cases, taking
* the total match finding time from several minutes to seconds.
* Of course, the caveat is that the edit script found may no longer
* be optimal */
/* Big snake heuristic */
if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
{
S32 best = 0;
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 diagDiag = diag - forwardMid;
S32 dictIdx = forwardDiag[diag];
S32 srcIdx = dictIdx - diag;
S32 v = (dictIdx - dictLow) * 2 - diagDiag;
if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
if (v > best
&& dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
&& srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
S32 k;
for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
if (k == ZSTD_EDIST_SNAKE_THRESH) {
best = v;
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
break;
}
}
}
}
}
if (best > 0) {
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 1;
return;
}
}
{
S32 best = 0;
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 diagDiag = diag - backwardMid;
S32 dictIdx = backwardDiag[diag];
S32 srcIdx = dictIdx - diag;
S32 v = (dictHigh - dictIdx) * 2 + diagDiag;
if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
if (v > best
&& dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
&& srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
int k;
for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
if (k == ZSTD_EDIST_SNAKE_THRESH - 1) {
best = v;
partition->dictMid = dictIdx;
partition->srcMid = srcIdx;
break;
}
}
}
}
}
if (best > 0) {
partition->lowUseHeuristics = 1;
partition->highUseHeuristics = 0;
return;
}
}
}
/* More general 'too expensive' heuristic */
if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
S32 forwardDictSrcBest;
S32 forwardDictBest = 0;
S32 backwardDictSrcBest;
S32 backwardDictBest = 0;
forwardDictSrcBest = -1;
for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
S32 srcIdx = dictIdx - diag;
if (srcHigh < srcIdx) {
dictIdx = srcHigh + diag;
srcIdx = srcHigh;
}
if (forwardDictSrcBest < dictIdx + srcIdx) {
forwardDictSrcBest = dictIdx + srcIdx;
forwardDictBest = dictIdx;
}
}
backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
S32 srcIdx = dictIdx - diag;
if (srcIdx < srcLow) {
dictIdx = srcLow + diag;
srcIdx = srcLow;
}
if (dictIdx + srcIdx < backwardDictSrcBest) {
backwardDictSrcBest = dictIdx + srcIdx;
backwardDictBest = dictIdx;
}
}
if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
partition->dictMid = forwardDictBest;
partition->srcMid = forwardDictSrcBest - forwardDictBest;
partition->lowUseHeuristics = 0;
partition->highUseHeuristics = 1;
} else {
partition->dictMid = backwardDictBest;
partition->srcMid = backwardDictSrcBest - backwardDictBest;
partition->lowUseHeuristics = 1;
partition->highUseHeuristics = 0;
}
return;
}
}
}
static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state,
S32 const dictIdx, S32 const srcIdx)
{
state->matches[state->nbMatches].dictIdx = dictIdx;
state->matches[state->nbMatches].srcIdx = srcIdx;
state->matches[state->nbMatches].matchLength = 1;
state->nbMatches++;
}
static int ZSTD_eDist_compare(ZSTD_eDist_state* state,
S32 dictLow, S32 dictHigh, S32 srcLow,
S32 srcHigh, int useHeuristics)
{
const BYTE* const dict = state->dict;
const BYTE* const src = state->src;
/* Found matches while traversing from the low end */
while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) {
ZSTD_eDist_insertMatch(state, dictLow, srcLow);
dictLow++;
srcLow++;
}
/* Found matches while traversing from the high end */
while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) {
ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1);
dictHigh--;
srcHigh--;
}
/* If the low and high end end up touching. If we wanted to make
* note of the differences like most diffing algorithms do, we would
* do so here. In our case, we're only concerned with matches
* Note: if you wanted to find the edit distance of the algorithm,
* you could just accumulate the cost for an insertion/deletion
* below. */
if (dictLow == dictHigh) {
while (srcLow < srcHigh) {
/* Reaching this point means inserting src[srcLow] into
* the current position of dict */
srcLow++;
}
} else if (srcLow == srcHigh) {
while (dictLow < dictHigh) {
/* Reaching this point means deleteing dict[dictLow] from
* the current positino of dict */
dictLow++;
}
} else {
ZSTD_eDist_partition partition;
partition.dictMid = 0;
partition.srcMid = 0;
ZSTD_eDist_diag(state, &partition, dictLow, dictHigh,
srcLow, srcHigh, useHeuristics);
if (ZSTD_eDist_compare(state, dictLow, partition.dictMid,
srcLow, partition.srcMid, partition.lowUseHeuristics))
return 1;
if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh,
partition.srcMid, srcHigh, partition.highUseHeuristics))
return 1;
}
return 0;
}
static int ZSTD_eDist_matchComp(const void* p, const void* q)
{
S32 const l = ((ZSTD_eDist_match*)p)->srcIdx;
S32 const r = ((ZSTD_eDist_match*)q)->srcIdx;
return (l - r);
}
/* The matches from the approach above will all be of the form
* (dictIdx, srcIdx, 1). this method combines contiguous matches
* of length MINMATCH or greater. Matches less than MINMATCH
* are discarded */
static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state)
{
/* Create a new buffer to put the combined matches into
* and memcpy to state->matches after */
ZSTD_eDist_match* combinedMatches =
ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match),
ZSTD_defaultCMem);
U32 nbCombinedMatches = 1;
size_t i;
/* Make sure that the srcIdx and dictIdx are in sorted order.
* The combination step won't work otherwise */
qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp);
memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match));
for (i = 1; i < state->nbMatches; i++) {
ZSTD_eDist_match const match = state->matches[i];
ZSTD_eDist_match const combinedMatch =
combinedMatches[nbCombinedMatches - 1];
if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx &&
combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) {
combinedMatches[nbCombinedMatches - 1].matchLength++;
} else {
/* Discard matches that are less than MINMATCH */
if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) {
nbCombinedMatches--;
}
memcpy(combinedMatches + nbCombinedMatches,
state->matches + i, sizeof(ZSTD_eDist_match));
nbCombinedMatches++;
}
}
memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match));
state->nbMatches = nbCombinedMatches;
ZSTD_free(combinedMatches, ZSTD_defaultCMem);
}
static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences,
ZSTD_eDist_state* state)
{
const ZSTD_eDist_match* matches = state->matches;
size_t const nbMatches = state->nbMatches;
size_t const dictSize = state->dictSize;
size_t nbSequences = 0;
size_t i;
for (i = 0; i < nbMatches; i++) {
ZSTD_eDist_match const match = matches[i];
U32 const litLength = !i ? match.srcIdx :
match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength);
U32 const offset = (match.srcIdx + dictSize) - match.dictIdx;
U32 const matchLength = match.matchLength;
sequences[nbSequences].offset = offset;
sequences[nbSequences].litLength = litLength;
sequences[nbSequences].matchLength = matchLength;
nbSequences++;
}
return nbSequences;
}
/*-*************************************
* Interal utils
***************************************/
static size_t ZSTD_eDist_hamingDist(const BYTE* const a,
const BYTE* const b, size_t n)
{
size_t i;
size_t dist = 0;
for (i = 0; i < n; i++)
dist += a[i] != b[i];
return dist;
}
/* This is a pretty naive recursive implementation that should only
* be used for quick tests obviously. Don't try and run this on a
* GB file or something. There are faster implementations. Use those
* if you need to run it for large files. */
static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s,
size_t const sn, const BYTE* const t,
size_t const tn)
{
size_t a, b, c;
if (!sn)
return tn;
if (!tn)
return sn;
if (s[sn - 1] == t[tn - 1])
return ZSTD_eDist_levenshteinDist(
s, sn - 1, t, tn - 1);
a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1);
b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1);
c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn);
if (a > b)
a = b;
if (a > c)
a = c;
return a + 1;
}
static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches,
size_t const nbMatches, const BYTE* const dict,
size_t const dictSize, const BYTE* const src,
size_t const srcSize)
{
size_t i;
for (i = 0; i < nbMatches; i++) {
ZSTD_eDist_match match = matches[i];
U32 const dictIdx = match.dictIdx;
U32 const srcIdx = match.srcIdx;
U32 const matchLength = match.matchLength;
assert(dictIdx + matchLength < dictSize);
assert(srcIdx + matchLength < srcSize);
assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength));
}
}
/*-*************************************
* API
***************************************/
size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
const void* dict, size_t dictSize,
const void* src, size_t srcSize,
int useHeuristics)
{
size_t const nbDiags = dictSize + srcSize + 3;
S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem);
ZSTD_eDist_state state;
size_t nbSequences = 0;
state.dict = (const BYTE*)dict;
state.src = (const BYTE*)src;
state.dictSize = dictSize;
state.srcSize = srcSize;
state.forwardDiag = buffer;
state.backwardDiag = buffer + nbDiags;
state.forwardDiag += srcSize + 1;
state.backwardDiag += srcSize + 1;
state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem);
state.nbMatches = 0;
ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1);
ZSTD_eDist_combineMatches(&state);
nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state);
ZSTD_free(buffer, ZSTD_defaultCMem);
ZSTD_free(state.matches, ZSTD_defaultCMem);
return nbSequences;
}

View file

@ -0,0 +1,70 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* This match finder leverages techniques used in file comparison algorithms
* to find matches between a dictionary and a source file.
*
* The original motivation for studying this approach was to try and optimize
* Zstandard for the use case of patching: the most common scenario being
* updating an existing software package with the next version. When patching,
* the difference between the old version of the package and the new version
* is generally tiny (most of the new file will be identical to
* the old one). In more technical terms, the edit distance (the minimal number
* of changes required to take one sequence of bytes to another) between the
* files would be small relative to the size of the file.
*
* Various 'diffing' algorithms utilize this notion of edit distance and
* the corrensponding concept of a minimal edit script between two
* sequences to identify the regions within two files where they differ.
* The core algorithm used in this match finder is described in:
*
* "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers,
* Algorithmica Vol. 1, 1986, pp. 251-266,
* <https://doi.org/10.1007/BF01840446>.
*
* Additional algorithmic heuristics for speed improvement have also been included.
* These we inspired from implementations of various regular and binary diffing
* algorithms such as GNU diff, bsdiff, and Xdelta.
*
* Note: after some experimentation, this approach proved to not provide enough
* utility to justify the additional CPU used in finding matches. The one area
* where this approach consistenly outperforms Zstandard even on level 19 is
* when compressing small files (<10 KB) using a equally small dictionary that
* is very similar to the source file. For the use case that this was intended,
* (large similar files) this approach by itself took 5-10X longer than zstd-19 and
* generally resulted in 2-3X larger files. The core advantage that zstd-19 has
* over this appraoch for match finding is the overlapping matches. This approach
* cannot find any.
*
* I'm leaving this in the contrib section in case this ever becomes interesting
* to explore again.
* */
#ifndef ZSTD_EDIST_H
#define ZSTD_EDIST_H
/*-*************************************
* Dependencies
***************************************/
#include <stddef.h>
#include "zstd_internal.h" /* ZSTD_Sequence */
/*! ZSTD_eDist_genSequences() :
* Will populate the provided ZSTD_Sequence buffer with sequences
* based on the optimal or near-optimal (depending on 'useHeuristics')
* edit script between 'dict' and 'src.'
* @return : the number of sequences found */
size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences,
const void* dict, size_t dictSize,
const void* src, size_t srcSize,
int useHeuristics);
#endif

View file

@ -0,0 +1,6 @@
-- Include zstd.lua in your GENie or premake4 file, which exposes a project_zstd function
dofile('zstd.lua')
solution 'example'
configurations { 'Debug', 'Release' }
project_zstd('../../lib/')

View file

@ -0,0 +1,80 @@
-- This GENie/premake file copies the behavior of the Makefile in the lib folder.
-- Basic usage: project_zstd(ZSTD_DIR)
function project_zstd(dir, compression, decompression, deprecated, dictbuilder, legacy)
if compression == nil then compression = true end
if decompression == nil then decompression = true end
if deprecated == nil then deprecated = false end
if dictbuilder == nil then dictbuilder = false end
if legacy == nil then legacy = 0 end
if not compression then
dictbuilder = false
deprecated = false
end
if not decompression then
legacy = 0
deprecated = false
end
project 'zstd'
kind 'StaticLib'
language 'C'
files {
dir .. 'zstd.h',
dir .. 'common/**.c',
dir .. 'common/**.h'
}
if compression then
files {
dir .. 'compress/**.c',
dir .. 'compress/**.h'
}
end
if decompression then
files {
dir .. 'decompress/**.c',
dir .. 'decompress/**.h'
}
end
if dictbuilder then
files {
dir .. 'dictBuilder/**.c',
dir .. 'dictBuilder/**.h'
}
end
if deprecated then
files {
dir .. 'deprecated/**.c',
dir .. 'deprecated/**.h'
}
end
if legacy ~= 0 then
if legacy >= 8 then
files {
dir .. 'legacy/zstd_v0' .. (legacy - 7) .. '.*'
}
end
includedirs {
dir .. 'legacy'
}
end
includedirs {
dir,
dir .. 'common'
}
defines {
'XXH_NAMESPACE=ZSTD_',
'ZSTD_LEGACY_SUPPORT=' .. legacy
}
end

View file

@ -0,0 +1,2 @@
# compilation result
pzstd

View file

@ -0,0 +1,72 @@
cxx_library(
name='libpzstd',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=[
'ErrorHolder.h',
'Logging.h',
'Pzstd.h',
],
headers=[
'SkippableFrame.h',
],
srcs=[
'Pzstd.cpp',
'SkippableFrame.cpp',
],
deps=[
':options',
'//contrib/pzstd/utils:utils',
'//lib:mem',
'//lib:zstd',
],
)
cxx_library(
name='options',
visibility=['PUBLIC'],
header_namespace='',
exported_headers=['Options.h'],
srcs=['Options.cpp'],
deps=[
'//contrib/pzstd/utils:scope_guard',
'//lib:zstd',
'//programs:util',
],
)
cxx_binary(
name='pzstd',
visibility=['PUBLIC'],
srcs=['main.cpp'],
deps=[
':libpzstd',
':options',
],
)
# Must run "make googletest" first
cxx_library(
name='gtest',
srcs=glob([
'googletest/googletest/src/gtest-all.cc',
'googletest/googlemock/src/gmock-all.cc',
'googletest/googlemock/src/gmock_main.cc',
]),
header_namespace='',
exported_headers=subdir_glob([
('googletest/googletest/include', '**/*.h'),
('googletest/googlemock/include', '**/*.h'),
]),
headers=subdir_glob([
('googletest/googletest', 'src/*.cc'),
('googletest/googletest', 'src/*.h'),
('googletest/googlemock', 'src/*.cc'),
('googletest/googlemock', 'src/*.h'),
]),
platform_linker_flags=[
('android', []),
('', ['-lpthread']),
],
visibility=['PUBLIC'],
)

View file

@ -0,0 +1,54 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <atomic>
#include <cassert>
#include <stdexcept>
#include <string>
namespace pzstd {
// Coordinates graceful shutdown of the pzstd pipeline
class ErrorHolder {
std::atomic<bool> error_;
std::string message_;
public:
ErrorHolder() : error_(false) {}
bool hasError() noexcept {
return error_.load();
}
void setError(std::string message) noexcept {
// Given multiple possibly concurrent calls, exactly one will ever succeed.
bool expected = false;
if (error_.compare_exchange_strong(expected, true)) {
message_ = std::move(message);
}
}
bool check(bool predicate, std::string message) noexcept {
if (!predicate) {
setError(std::move(message));
}
return !hasError();
}
std::string getError() noexcept {
error_.store(false);
return std::move(message_);
}
~ErrorHolder() {
assert(!hasError());
}
};
}

View file

@ -0,0 +1,72 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <cstdio>
#include <mutex>
namespace pzstd {
constexpr int kLogError = 1;
constexpr int kLogInfo = 2;
constexpr int kLogDebug = 3;
constexpr int kLogVerbose = 4;
class Logger {
std::mutex mutex_;
FILE* out_;
const int level_;
using Clock = std::chrono::system_clock;
Clock::time_point lastUpdate_;
std::chrono::milliseconds refreshRate_;
public:
explicit Logger(int level, FILE* out = stderr)
: out_(out), level_(level), lastUpdate_(Clock::now()),
refreshRate_(150) {}
bool logsAt(int level) {
return level <= level_;
}
template <typename... Args>
void operator()(int level, const char *fmt, Args... args) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
std::fprintf(out_, fmt, args...);
}
template <typename... Args>
void update(int level, const char *fmt, Args... args) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
auto now = Clock::now();
if (now - lastUpdate_ > refreshRate_) {
lastUpdate_ = now;
std::fprintf(out_, "\r");
std::fprintf(out_, fmt, args...);
}
}
void clear(int level) {
if (level > level_) {
return;
}
std::lock_guard<std::mutex> lock(mutex_);
std::fprintf(out_, "\r%79s\r", "");
}
};
}

View file

@ -0,0 +1,274 @@
# ################################################################
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
# Standard variables for installation
DESTDIR ?=
PREFIX ?= /usr/local
BINDIR := $(DESTDIR)$(PREFIX)/bin
ZSTDDIR = ../../lib
PROGDIR = ../../programs
# External program to use to run tests, e.g. qemu or valgrind
TESTPROG ?=
# Flags to pass to the tests
TESTFLAGS ?=
# We use gcc/clang to generate the header dependencies of files
DEPFLAGS = -MMD -MP -MF $*.Td
POSTCOMPILE = mv -f $*.Td $*.d
# CFLAGS, CXXFLAGS, CPPFLAGS, and LDFLAGS are for the users to override
CFLAGS ?= -O3 -Wall -Wextra
CXXFLAGS ?= -O3 -Wall -Wextra -pedantic
CPPFLAGS ?=
LDFLAGS ?=
# PZstd uses legacy APIs
CFLAGS += -Wno-deprecated-declarations
# Include flags
PZSTD_INC = -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(PROGDIR) -I.
GTEST_INC = -isystem googletest/googletest/include
PZSTD_CPPFLAGS = $(PZSTD_INC)
PZSTD_CCXXFLAGS =
PZSTD_CFLAGS = $(PZSTD_CCXXFLAGS)
PZSTD_CXXFLAGS = $(PZSTD_CCXXFLAGS) -std=c++11
PZSTD_LDFLAGS =
EXTRA_FLAGS =
ALL_CFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CFLAGS) $(PZSTD_CFLAGS)
ALL_CXXFLAGS = $(EXTRA_FLAGS) $(CPPFLAGS) $(PZSTD_CPPFLAGS) $(CXXFLAGS) $(PZSTD_CXXFLAGS)
ALL_LDFLAGS = $(EXTRA_FLAGS) $(CXXFLAGS) $(LDFLAGS) $(PZSTD_LDFLAGS)
# gtest libraries need to go before "-lpthread" because they depend on it.
GTEST_LIB = -L googletest/build/googlemock/gtest
LIBS =
# Compilation commands
LD_COMMAND = $(CXX) $^ $(ALL_LDFLAGS) $(LIBS) -pthread -o $@
CC_COMMAND = $(CC) $(DEPFLAGS) $(ALL_CFLAGS) -c $< -o $@
CXX_COMMAND = $(CXX) $(DEPFLAGS) $(ALL_CXXFLAGS) -c $< -o $@
# Get a list of all zstd files so we rebuild the static library when we need to
ZSTDCOMMON_FILES := $(wildcard $(ZSTDDIR)/common/*.c) \
$(wildcard $(ZSTDDIR)/common/*.h)
ZSTDCOMP_FILES := $(wildcard $(ZSTDDIR)/compress/*.c) \
$(wildcard $(ZSTDDIR)/compress/*.h)
ZSTDDECOMP_FILES := $(wildcard $(ZSTDDIR)/decompress/*.c) \
$(wildcard $(ZSTDDIR)/decompress/*.h)
ZSTDPROG_FILES := $(wildcard $(PROGDIR)/*.c) \
$(wildcard $(PROGDIR)/*.h)
ZSTD_FILES := $(wildcard $(ZSTDDIR)/*.h) \
$(ZSTDDECOMP_FILES) $(ZSTDCOMMON_FILES) $(ZSTDCOMP_FILES) \
$(ZSTDPROG_FILES)
# List all the pzstd source files so we can determine their dependencies
PZSTD_SRCS := $(wildcard *.cpp)
PZSTD_TESTS := $(wildcard test/*.cpp)
UTILS_TESTS := $(wildcard utils/test/*.cpp)
ALL_SRCS := $(PZSTD_SRCS) $(PZSTD_TESTS) $(UTILS_TESTS)
# Define *.exe as extension for Windows systems
ifneq (,$(filter Windows%,$(OS)))
EXT =.exe
else
EXT =
endif
# Standard targets
.PHONY: default
default: all
.PHONY: test-pzstd
test-pzstd: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
test-pzstd: clean googletest pzstd tests check
.PHONY: test-pzstd32
test-pzstd32: clean googletest32 all32 check
.PHONY: test-pzstd-tsan
test-pzstd-tsan: LDFLAGS=-fuse-ld=gold
test-pzstd-tsan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
test-pzstd-tsan: clean googletest tsan check
.PHONY: test-pzstd-asan
test-pzstd-asan: LDFLAGS=-fuse-ld=gold
test-pzstd-asan: TESTFLAGS=--gtest_filter=-*ExtremelyLarge*
test-pzstd-asan: clean asan check
.PHONY: check
check:
$(TESTPROG) ./utils/test/BufferTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./utils/test/RangeTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./utils/test/ResourcePoolTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./utils/test/ScopeGuardTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./utils/test/ThreadPoolTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./utils/test/WorkQueueTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./test/OptionsTest$(EXT) $(TESTFLAGS)
$(TESTPROG) ./test/PzstdTest$(EXT) $(TESTFLAGS)
.PHONY: install
install: PZSTD_CPPFLAGS += -DNDEBUG
install: pzstd$(EXT)
install -d -m 755 $(BINDIR)/
install -m 755 pzstd$(EXT) $(BINDIR)/pzstd$(EXT)
.PHONY: uninstall
uninstall:
$(RM) $(BINDIR)/pzstd$(EXT)
# Targets for many different builds
.PHONY: all
all: PZSTD_CPPFLAGS += -DNDEBUG
all: pzstd$(EXT)
.PHONY: debug
debug: EXTRA_FLAGS += -g
debug: pzstd$(EXT) tests roundtrip
.PHONY: tsan
tsan: PZSTD_CCXXFLAGS += -fsanitize=thread -fPIC
tsan: PZSTD_LDFLAGS += -fsanitize=thread
tsan: debug
.PHONY: asan
asan: EXTRA_FLAGS += -fsanitize=address
asan: debug
.PHONY: ubsan
ubsan: EXTRA_FLAGS += -fsanitize=undefined
ubsan: debug
.PHONY: all32
all32: EXTRA_FLAGS += -m32
all32: all tests roundtrip
.PHONY: debug32
debug32: EXTRA_FLAGS += -m32
debug32: debug
.PHONY: asan32
asan32: EXTRA_FLAGS += -m32
asan32: asan
.PHONY: tsan32
tsan32: EXTRA_FLAGS += -m32
tsan32: tsan
.PHONY: ubsan32
ubsan32: EXTRA_FLAGS += -m32
ubsan32: ubsan
# Run long round trip tests
.PHONY: roundtripcheck
roundtripcheck: roundtrip check
$(TESTPROG) ./test/RoundTripTest$(EXT) $(TESTFLAGS)
# Build the main binary
pzstd$(EXT): main.o $(PROGDIR)/util.o Options.o Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
$(LD_COMMAND)
# Target that depends on all the tests
.PHONY: tests
tests: EXTRA_FLAGS += -Wno-deprecated-declarations
tests: $(patsubst %,%$(EXT),$(basename $(PZSTD_TESTS) $(UTILS_TESTS)))
# Build the round trip tests
.PHONY: roundtrip
roundtrip: EXTRA_FLAGS += -Wno-deprecated-declarations
roundtrip: test/RoundTripTest$(EXT)
# Use the static library that zstd builds for simplicity and
# so we get the compiler options correct
$(ZSTDDIR)/libzstd.a: $(ZSTD_FILES)
CFLAGS="$(ALL_CFLAGS)" LDFLAGS="$(ALL_LDFLAGS)" $(MAKE) -C $(ZSTDDIR) libzstd.a
# Rules to build the tests
test/RoundTripTest$(EXT): test/RoundTripTest.o $(PROGDIR)/datagen.o \
$(PROGDIR)/util.o Options.o \
Pzstd.o SkippableFrame.o $(ZSTDDIR)/libzstd.a
$(LD_COMMAND)
test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
test/%Test$(EXT): LIBS += -lgtest -lgtest_main
test/%Test$(EXT): test/%Test.o $(PROGDIR)/datagen.o \
$(PROGDIR)/util.o Options.o Pzstd.o \
SkippableFrame.o $(ZSTDDIR)/libzstd.a
$(LD_COMMAND)
utils/test/%Test$(EXT): PZSTD_LDFLAGS += $(GTEST_LIB)
utils/test/%Test$(EXT): LIBS += -lgtest -lgtest_main
utils/test/%Test$(EXT): utils/test/%Test.o
$(LD_COMMAND)
GTEST_CMAKEFLAGS =
# Install googletest
.PHONY: googletest
googletest: PZSTD_CCXXFLAGS += -fPIC
googletest:
@$(RM) -rf googletest
@git clone https://github.com/google/googletest
@mkdir -p googletest/build
@cd googletest/build && cmake $(GTEST_CMAKEFLAGS) -DCMAKE_CXX_FLAGS="$(ALL_CXXFLAGS)" .. && $(MAKE)
.PHONY: googletest32
googletest32: PZSTD_CCXXFLAGS += -m32
googletest32: googletest
.PHONY: googletest-mingw64
googletest-mingw64: GTEST_CMAKEFLAGS += -G "MSYS Makefiles"
googletest-mingw64: googletest
.PHONY: clean
clean:
$(RM) -f *.o pzstd$(EXT) *.Td *.d
$(RM) -f test/*.o test/*Test$(EXT) test/*.Td test/*.d
$(RM) -f utils/test/*.o utils/test/*Test$(EXT) utils/test/*.Td utils/test/*.d
$(RM) -f $(PROGDIR)/*.o $(PROGDIR)/*.Td $(PROGDIR)/*.d
$(MAKE) -C $(ZSTDDIR) clean
@echo Cleaning completed
# Cancel implicit rules
%.o: %.c
%.o: %.cpp
# Object file rules
%.o: %.c
$(CC_COMMAND)
$(POSTCOMPILE)
$(PROGDIR)/%.o: $(PROGDIR)/%.c
$(CC_COMMAND)
$(POSTCOMPILE)
%.o: %.cpp
$(CXX_COMMAND)
$(POSTCOMPILE)
test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC)
test/%.o: test/%.cpp
$(CXX_COMMAND)
$(POSTCOMPILE)
utils/test/%.o: PZSTD_CPPFLAGS += $(GTEST_INC)
utils/test/%.o: utils/test/%.cpp
$(CXX_COMMAND)
$(POSTCOMPILE)
# Dependency file stuff
.PRECIOUS: %.d test/%.d utils/test/%.d
# Include rules that specify header file dependencies
-include $(patsubst %,%.d,$(basename $(ALL_SRCS)))

View file

@ -0,0 +1,424 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Options.h"
#include "util.h"
#include "utils/ScopeGuard.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <iterator>
#include <thread>
#include <vector>
namespace pzstd {
namespace {
unsigned defaultNumThreads() {
#ifdef PZSTD_NUM_THREADS
return PZSTD_NUM_THREADS;
#else
return std::thread::hardware_concurrency();
#endif
}
unsigned parseUnsigned(const char **arg) {
unsigned result = 0;
while (**arg >= '0' && **arg <= '9') {
result *= 10;
result += **arg - '0';
++(*arg);
}
return result;
}
const char *getArgument(const char *options, const char **argv, int &i,
int argc) {
if (options[1] != 0) {
return options + 1;
}
++i;
if (i == argc) {
std::fprintf(stderr, "Option -%c requires an argument, but none provided\n",
*options);
return nullptr;
}
return argv[i];
}
const std::string kZstdExtension = ".zst";
constexpr char kStdIn[] = "-";
constexpr char kStdOut[] = "-";
constexpr unsigned kDefaultCompressionLevel = 3;
constexpr unsigned kMaxNonUltraCompressionLevel = 19;
#ifdef _WIN32
const char nullOutput[] = "nul";
#else
const char nullOutput[] = "/dev/null";
#endif
void notSupported(const char *option) {
std::fprintf(stderr, "Operation not supported: %s\n", option);
}
void usage() {
std::fprintf(stderr, "Usage:\n");
std::fprintf(stderr, " pzstd [args] [FILE(s)]\n");
std::fprintf(stderr, "Parallel ZSTD options:\n");
std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n");
std::fprintf(stderr, "ZSTD options:\n");
std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel);
std::fprintf(stderr, " -d, --decompress : decompression\n");
std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n");
std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n");
std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n");
std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n");
std::fprintf(stderr, " -h, --help : display help and exit\n");
std::fprintf(stderr, " -V, --version : display version number and exit\n");
std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n");
std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n");
std::fprintf(stderr, " -c, --stdout : force write to standard output, even if it is the console\n");
#ifdef UTIL_HAS_CREATEFILELIST
std::fprintf(stderr, " -r : operate recursively on directories\n");
#endif
std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel());
std::fprintf(stderr, " -C, --check : integrity check (default)\n");
std::fprintf(stderr, " --no-check : no integrity check\n");
std::fprintf(stderr, " -t, --test : test compressed file integrity\n");
std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n");
}
} // anonymous namespace
Options::Options()
: numThreads(defaultNumThreads()), maxWindowLog(23),
compressionLevel(kDefaultCompressionLevel), decompress(false),
overwrite(false), keepSource(true), writeMode(WriteMode::Auto),
checksum(true), verbosity(2) {}
Options::Status Options::parse(int argc, const char **argv) {
bool test = false;
bool recursive = false;
bool ultra = false;
bool forceStdout = false;
bool followLinks = false;
// Local copy of input files, which are pointers into argv.
std::vector<const char *> localInputFiles;
for (int i = 1; i < argc; ++i) {
const char *arg = argv[i];
// Protect against empty arguments
if (arg[0] == 0) {
continue;
}
// Everything after "--" is an input file
if (!std::strcmp(arg, "--")) {
++i;
std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles));
break;
}
// Long arguments that don't have a short option
{
bool isLongOption = true;
if (!std::strcmp(arg, "--rm")) {
keepSource = false;
} else if (!std::strcmp(arg, "--ultra")) {
ultra = true;
maxWindowLog = 0;
} else if (!std::strcmp(arg, "--no-check")) {
checksum = false;
} else if (!std::strcmp(arg, "--sparse")) {
writeMode = WriteMode::Sparse;
notSupported("Sparse mode");
return Status::Failure;
} else if (!std::strcmp(arg, "--no-sparse")) {
writeMode = WriteMode::Regular;
notSupported("Sparse mode");
return Status::Failure;
} else if (!std::strcmp(arg, "--dictID")) {
notSupported(arg);
return Status::Failure;
} else if (!std::strcmp(arg, "--no-dictID")) {
notSupported(arg);
return Status::Failure;
} else {
isLongOption = false;
}
if (isLongOption) {
continue;
}
}
// Arguments with a short option simply set their short option.
const char *options = nullptr;
if (!std::strcmp(arg, "--processes")) {
options = "p";
} else if (!std::strcmp(arg, "--version")) {
options = "V";
} else if (!std::strcmp(arg, "--help")) {
options = "h";
} else if (!std::strcmp(arg, "--decompress")) {
options = "d";
} else if (!std::strcmp(arg, "--force")) {
options = "f";
} else if (!std::strcmp(arg, "--stdout")) {
options = "c";
} else if (!std::strcmp(arg, "--keep")) {
options = "k";
} else if (!std::strcmp(arg, "--verbose")) {
options = "v";
} else if (!std::strcmp(arg, "--quiet")) {
options = "q";
} else if (!std::strcmp(arg, "--check")) {
options = "C";
} else if (!std::strcmp(arg, "--test")) {
options = "t";
} else if (arg[0] == '-' && arg[1] != 0) {
options = arg + 1;
} else {
localInputFiles.emplace_back(arg);
continue;
}
assert(options != nullptr);
bool finished = false;
while (!finished && *options != 0) {
// Parse the compression level
if (*options >= '0' && *options <= '9') {
compressionLevel = parseUnsigned(&options);
continue;
}
switch (*options) {
case 'h':
case 'H':
usage();
return Status::Message;
case 'V':
std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING);
return Status::Message;
case 'p': {
finished = true;
const char *optionArgument = getArgument(options, argv, i, argc);
if (optionArgument == nullptr) {
return Status::Failure;
}
if (*optionArgument < '0' || *optionArgument > '9') {
std::fprintf(stderr, "Option -p expects a number, but %s provided\n",
optionArgument);
return Status::Failure;
}
numThreads = parseUnsigned(&optionArgument);
if (*optionArgument != 0) {
std::fprintf(stderr,
"Option -p expects a number, but %u%s provided\n",
numThreads, optionArgument);
return Status::Failure;
}
break;
}
case 'o': {
finished = true;
const char *optionArgument = getArgument(options, argv, i, argc);
if (optionArgument == nullptr) {
return Status::Failure;
}
outputFile = optionArgument;
break;
}
case 'C':
checksum = true;
break;
case 'k':
keepSource = true;
break;
case 'd':
decompress = true;
break;
case 'f':
overwrite = true;
forceStdout = true;
followLinks = true;
break;
case 't':
test = true;
decompress = true;
break;
#ifdef UTIL_HAS_CREATEFILELIST
case 'r':
recursive = true;
break;
#endif
case 'c':
outputFile = kStdOut;
forceStdout = true;
break;
case 'v':
++verbosity;
break;
case 'q':
--verbosity;
// Ignore them for now
break;
// Unsupported options from Zstd
case 'D':
case 's':
notSupported("Zstd dictionaries.");
return Status::Failure;
case 'b':
case 'e':
case 'i':
case 'B':
notSupported("Zstd benchmarking options.");
return Status::Failure;
default:
std::fprintf(stderr, "Invalid argument: %s\n", arg);
return Status::Failure;
}
if (!finished) {
++options;
}
} // while (*options != 0);
} // for (int i = 1; i < argc; ++i);
// Set options for test mode
if (test) {
outputFile = nullOutput;
keepSource = true;
}
// Input file defaults to standard input if not provided.
if (localInputFiles.empty()) {
localInputFiles.emplace_back(kStdIn);
}
// Check validity of input files
if (localInputFiles.size() > 1) {
const auto it = std::find(localInputFiles.begin(), localInputFiles.end(),
std::string{kStdIn});
if (it != localInputFiles.end()) {
std::fprintf(
stderr,
"Cannot specify standard input when handling multiple files\n");
return Status::Failure;
}
}
if (localInputFiles.size() > 1 || recursive) {
if (!outputFile.empty() && outputFile != nullOutput) {
std::fprintf(
stderr,
"Cannot specify an output file when handling multiple inputs\n");
return Status::Failure;
}
}
g_utilDisplayLevel = verbosity;
// Remove local input files that are symbolic links
if (!followLinks) {
std::remove_if(localInputFiles.begin(), localInputFiles.end(),
[&](const char *path) {
bool isLink = UTIL_isLink(path);
if (isLink && verbosity >= 2) {
std::fprintf(
stderr,
"Warning : %s is symbolic link, ignoring\n",
path);
}
return isLink;
});
}
// Translate input files/directories into files to (de)compress
if (recursive) {
FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks);
if (files == nullptr) {
std::fprintf(stderr, "Error traversing directories\n");
return Status::Failure;
}
auto guard =
makeScopeGuard([&] { UTIL_freeFileNamesTable(files); });
if (files->tableSize == 0) {
std::fprintf(stderr, "No files found\n");
return Status::Failure;
}
inputFiles.resize(files->tableSize);
std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin());
} else {
inputFiles.resize(localInputFiles.size());
std::copy(localInputFiles.begin(), localInputFiles.end(),
inputFiles.begin());
}
localInputFiles.clear();
assert(!inputFiles.empty());
// If reading from standard input, default to standard output
if (inputFiles[0] == kStdIn && outputFile.empty()) {
assert(inputFiles.size() == 1);
outputFile = "-";
}
if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) {
assert(inputFiles.size() == 1);
std::fprintf(stderr, "Cannot read input from interactive console\n");
return Status::Failure;
}
if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) {
std::fprintf(stderr, "Will not write to console stdout unless -c or -f is "
"specified and decompressing\n");
return Status::Failure;
}
// Check compression level
{
unsigned maxCLevel =
ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel;
if (compressionLevel > maxCLevel || compressionLevel == 0) {
std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel);
return Status::Failure;
}
}
// Check that numThreads is set
if (numThreads == 0) {
std::fprintf(stderr, "Invalid arguments: # of threads not specified "
"and unable to determine hardware concurrency.\n");
return Status::Failure;
}
// Modify verbosity
// If we are piping input and output, turn off interaction
if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) {
verbosity = 1;
}
// If we are in multi-file mode, turn off interaction
if (inputFiles.size() > 1 && verbosity == 2) {
verbosity = 1;
}
return Status::Success;
}
std::string Options::getOutputFile(const std::string &inputFile) const {
if (!outputFile.empty()) {
return outputFile;
}
// Attempt to add/remove zstd extension from the input file
if (decompress) {
int stemSize = inputFile.size() - kZstdExtension.size();
if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) {
return inputFile.substr(0, stemSize);
} else {
return "";
}
} else {
return inputFile + kZstdExtension;
}
}
}

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#define ZSTD_STATIC_LINKING_ONLY
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
* and uses deprecated functions
*/
#include "zstd.h"
#undef ZSTD_STATIC_LINKING_ONLY
#include <cstdint>
#include <string>
#include <vector>
namespace pzstd {
struct Options {
enum class WriteMode { Regular, Auto, Sparse };
unsigned numThreads;
unsigned maxWindowLog;
unsigned compressionLevel;
bool decompress;
std::vector<std::string> inputFiles;
std::string outputFile;
bool overwrite;
bool keepSource;
WriteMode writeMode;
bool checksum;
int verbosity;
enum class Status {
Success, // Successfully parsed options
Failure, // Failure to parse options
Message // Options specified to print a message (e.g. "-h")
};
Options();
Options(unsigned numThreads, unsigned maxWindowLog, unsigned compressionLevel,
bool decompress, std::vector<std::string> inputFiles,
std::string outputFile, bool overwrite, bool keepSource,
WriteMode writeMode, bool checksum, int verbosity)
: numThreads(numThreads), maxWindowLog(maxWindowLog),
compressionLevel(compressionLevel), decompress(decompress),
inputFiles(std::move(inputFiles)), outputFile(std::move(outputFile)),
overwrite(overwrite), keepSource(keepSource), writeMode(writeMode),
checksum(checksum), verbosity(verbosity) {}
Status parse(int argc, const char **argv);
ZSTD_parameters determineParameters() const {
ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, 0);
params.fParams.contentSizeFlag = 0;
params.fParams.checksumFlag = checksum;
if (maxWindowLog != 0 && params.cParams.windowLog > maxWindowLog) {
params.cParams.windowLog = maxWindowLog;
params.cParams = ZSTD_adjustCParams(params.cParams, 0, 0);
}
return params;
}
std::string getOutputFile(const std::string &inputFile) const;
};
}

View file

@ -0,0 +1,611 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "platform.h" /* Large Files support, SET_BINARY_MODE */
#include "Pzstd.h"
#include "SkippableFrame.h"
#include "utils/FileSystem.h"
#include "utils/Range.h"
#include "utils/ScopeGuard.h"
#include "utils/ThreadPool.h"
#include "utils/WorkQueue.h"
#include <chrono>
#include <cinttypes>
#include <cstddef>
#include <cstdio>
#include <memory>
#include <string>
namespace pzstd {
namespace {
#ifdef _WIN32
const std::string nullOutput = "nul";
#else
const std::string nullOutput = "/dev/null";
#endif
}
using std::size_t;
static std::uintmax_t fileSizeOrZero(const std::string &file) {
if (file == "-") {
return 0;
}
std::error_code ec;
auto size = file_size(file, ec);
if (ec) {
size = 0;
}
return size;
}
static std::uint64_t handleOneInput(const Options &options,
const std::string &inputFile,
FILE* inputFd,
const std::string &outputFile,
FILE* outputFd,
SharedState& state) {
auto inputSize = fileSizeOrZero(inputFile);
// WorkQueue outlives ThreadPool so in the case of error we are certain
// we don't accidentally try to call push() on it after it is destroyed
WorkQueue<std::shared_ptr<BufferWorkQueue>> outs{options.numThreads + 1};
std::uint64_t bytesRead;
std::uint64_t bytesWritten;
{
// Initialize the (de)compression thread pool with numThreads
ThreadPool executor(options.numThreads);
// Run the reader thread on an extra thread
ThreadPool readExecutor(1);
if (!options.decompress) {
// Add a job that reads the input and starts all the compression jobs
readExecutor.add(
[&state, &outs, &executor, inputFd, inputSize, &options, &bytesRead] {
bytesRead = asyncCompressChunks(
state,
outs,
executor,
inputFd,
inputSize,
options.numThreads,
options.determineParameters());
});
// Start writing
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
} else {
// Add a job that reads the input and starts all the decompression jobs
readExecutor.add([&state, &outs, &executor, inputFd, &bytesRead] {
bytesRead = asyncDecompressFrames(state, outs, executor, inputFd);
});
// Start writing
bytesWritten = writeFile(state, outs, outputFd, options.decompress);
}
}
if (!state.errorHolder.hasError()) {
std::string inputFileName = inputFile == "-" ? "stdin" : inputFile;
std::string outputFileName = outputFile == "-" ? "stdout" : outputFile;
if (!options.decompress) {
double ratio = static_cast<double>(bytesWritten) /
static_cast<double>(bytesRead + !bytesRead);
state.log(kLogInfo, "%-20s :%6.2f%% (%6" PRIu64 " => %6" PRIu64
" bytes, %s)\n",
inputFileName.c_str(), ratio * 100, bytesRead, bytesWritten,
outputFileName.c_str());
} else {
state.log(kLogInfo, "%-20s: %" PRIu64 " bytes \n",
inputFileName.c_str(),bytesWritten);
}
}
return bytesWritten;
}
static FILE *openInputFile(const std::string &inputFile,
ErrorHolder &errorHolder) {
if (inputFile == "-") {
SET_BINARY_MODE(stdin);
return stdin;
}
// Check if input file is a directory
{
std::error_code ec;
if (is_directory(inputFile, ec)) {
errorHolder.setError("Output file is a directory -- ignored");
return nullptr;
}
}
auto inputFd = std::fopen(inputFile.c_str(), "rb");
if (!errorHolder.check(inputFd != nullptr, "Failed to open input file")) {
return nullptr;
}
return inputFd;
}
static FILE *openOutputFile(const Options &options,
const std::string &outputFile,
SharedState& state) {
if (outputFile == "-") {
SET_BINARY_MODE(stdout);
return stdout;
}
// Check if the output file exists and then open it
if (!options.overwrite && outputFile != nullOutput) {
auto outputFd = std::fopen(outputFile.c_str(), "rb");
if (outputFd != nullptr) {
std::fclose(outputFd);
if (!state.log.logsAt(kLogInfo)) {
state.errorHolder.setError("Output file exists");
return nullptr;
}
state.log(
kLogInfo,
"pzstd: %s already exists; do you wish to overwrite (y/n) ? ",
outputFile.c_str());
int c = getchar();
if (c != 'y' && c != 'Y') {
state.errorHolder.setError("Not overwritten");
return nullptr;
}
}
}
auto outputFd = std::fopen(outputFile.c_str(), "wb");
if (!state.errorHolder.check(
outputFd != nullptr, "Failed to open output file")) {
return nullptr;
}
return outputFd;
}
int pzstdMain(const Options &options) {
int returnCode = 0;
SharedState state(options);
for (const auto& input : options.inputFiles) {
// Setup the shared state
auto printErrorGuard = makeScopeGuard([&] {
if (state.errorHolder.hasError()) {
returnCode = 1;
state.log(kLogError, "pzstd: %s: %s.\n", input.c_str(),
state.errorHolder.getError().c_str());
}
});
// Open the input file
auto inputFd = openInputFile(input, state.errorHolder);
if (inputFd == nullptr) {
continue;
}
auto closeInputGuard = makeScopeGuard([&] { std::fclose(inputFd); });
// Open the output file
auto outputFile = options.getOutputFile(input);
if (!state.errorHolder.check(outputFile != "",
"Input file does not have extension .zst")) {
continue;
}
auto outputFd = openOutputFile(options, outputFile, state);
if (outputFd == nullptr) {
continue;
}
auto closeOutputGuard = makeScopeGuard([&] { std::fclose(outputFd); });
// (de)compress the file
handleOneInput(options, input, inputFd, outputFile, outputFd, state);
if (state.errorHolder.hasError()) {
continue;
}
// Delete the input file if necessary
if (!options.keepSource) {
// Be sure that we are done and have written everything before we delete
if (!state.errorHolder.check(std::fclose(inputFd) == 0,
"Failed to close input file")) {
continue;
}
closeInputGuard.dismiss();
if (!state.errorHolder.check(std::fclose(outputFd) == 0,
"Failed to close output file")) {
continue;
}
closeOutputGuard.dismiss();
if (std::remove(input.c_str()) != 0) {
state.errorHolder.setError("Failed to remove input file");
continue;
}
}
}
// Returns 1 if any of the files failed to (de)compress.
return returnCode;
}
/// Construct a `ZSTD_inBuffer` that points to the data in `buffer`.
static ZSTD_inBuffer makeZstdInBuffer(const Buffer& buffer) {
return ZSTD_inBuffer{buffer.data(), buffer.size(), 0};
}
/**
* Advance `buffer` and `inBuffer` by the amount of data read, as indicated by
* `inBuffer.pos`.
*/
void advance(Buffer& buffer, ZSTD_inBuffer& inBuffer) {
auto pos = inBuffer.pos;
inBuffer.src = static_cast<const unsigned char*>(inBuffer.src) + pos;
inBuffer.size -= pos;
inBuffer.pos = 0;
return buffer.advance(pos);
}
/// Construct a `ZSTD_outBuffer` that points to the data in `buffer`.
static ZSTD_outBuffer makeZstdOutBuffer(Buffer& buffer) {
return ZSTD_outBuffer{buffer.data(), buffer.size(), 0};
}
/**
* Split `buffer` and advance `outBuffer` by the amount of data written, as
* indicated by `outBuffer.pos`.
*/
Buffer split(Buffer& buffer, ZSTD_outBuffer& outBuffer) {
auto pos = outBuffer.pos;
outBuffer.dst = static_cast<unsigned char*>(outBuffer.dst) + pos;
outBuffer.size -= pos;
outBuffer.pos = 0;
return buffer.splitAt(pos);
}
/**
* Stream chunks of input from `in`, compress it, and stream it out to `out`.
*
* @param state The shared state
* @param in Queue that we `pop()` input buffers from
* @param out Queue that we `push()` compressed output buffers to
* @param maxInputSize An upper bound on the size of the input
*/
static void compress(
SharedState& state,
std::shared_ptr<BufferWorkQueue> in,
std::shared_ptr<BufferWorkQueue> out,
size_t maxInputSize) {
auto& errorHolder = state.errorHolder;
auto guard = makeScopeGuard([&] { out->finish(); });
// Initialize the CCtx
auto ctx = state.cStreamPool->get();
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_CStream")) {
return;
}
{
auto err = ZSTD_CCtx_reset(ctx.get(), ZSTD_reset_session_only);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
}
// Allocate space for the result
auto outBuffer = Buffer(ZSTD_compressBound(maxInputSize));
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
{
Buffer inBuffer;
// Read a buffer in from the input queue
while (in->pop(inBuffer) && !errorHolder.hasError()) {
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
// Compress the whole buffer and send it to the output queue
while (!inBuffer.empty() && !errorHolder.hasError()) {
if (!errorHolder.check(
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
return;
}
// Compress
auto err =
ZSTD_compressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
// Split the compressed data off outBuffer and pass to the output queue
out->push(split(outBuffer, zstdOutBuffer));
// Forget about the data we already compressed
advance(inBuffer, zstdInBuffer);
}
}
}
// Write the epilog
size_t bytesLeft;
do {
if (!errorHolder.check(
!outBuffer.empty(), "ZSTD_compressBound() was too small")) {
return;
}
bytesLeft = ZSTD_endStream(ctx.get(), &zstdOutBuffer);
if (!errorHolder.check(
!ZSTD_isError(bytesLeft), ZSTD_getErrorName(bytesLeft))) {
return;
}
out->push(split(outBuffer, zstdOutBuffer));
} while (bytesLeft != 0 && !errorHolder.hasError());
}
/**
* Calculates how large each independently compressed frame should be.
*
* @param size The size of the source if known, 0 otherwise
* @param numThreads The number of threads available to run compression jobs on
* @param params The zstd parameters to be used for compression
*/
static size_t calculateStep(
std::uintmax_t size,
size_t numThreads,
const ZSTD_parameters &params) {
(void)size;
(void)numThreads;
return size_t{1} << (params.cParams.windowLog + 2);
}
namespace {
enum class FileStatus { Continue, Done, Error };
/// Determines the status of the file descriptor `fd`.
FileStatus fileStatus(FILE* fd) {
if (std::feof(fd)) {
return FileStatus::Done;
} else if (std::ferror(fd)) {
return FileStatus::Error;
}
return FileStatus::Continue;
}
} // anonymous namespace
/**
* Reads `size` data in chunks of `chunkSize` and puts it into `queue`.
* Will read less if an error or EOF occurs.
* Returns the status of the file after all of the reads have occurred.
*/
static FileStatus
readData(BufferWorkQueue& queue, size_t chunkSize, size_t size, FILE* fd,
std::uint64_t *totalBytesRead) {
Buffer buffer(size);
while (!buffer.empty()) {
auto bytesRead =
std::fread(buffer.data(), 1, std::min(chunkSize, buffer.size()), fd);
*totalBytesRead += bytesRead;
queue.push(buffer.splitAt(bytesRead));
auto status = fileStatus(fd);
if (status != FileStatus::Continue) {
return status;
}
}
return FileStatus::Continue;
}
std::uint64_t asyncCompressChunks(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
ThreadPool& executor,
FILE* fd,
std::uintmax_t size,
size_t numThreads,
ZSTD_parameters params) {
auto chunksGuard = makeScopeGuard([&] { chunks.finish(); });
std::uint64_t bytesRead = 0;
// Break the input up into chunks of size `step` and compress each chunk
// independently.
size_t step = calculateStep(size, numThreads, params);
state.log(kLogDebug, "Chosen frame size: %zu\n", step);
auto status = FileStatus::Continue;
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
// Make a new input queue that we will put the chunk's input data into.
auto in = std::make_shared<BufferWorkQueue>();
auto inGuard = makeScopeGuard([&] { in->finish(); });
// Make a new output queue that compress will put the compressed data into.
auto out = std::make_shared<BufferWorkQueue>();
// Start compression in the thread pool
executor.add([&state, in, out, step] {
return compress(
state, std::move(in), std::move(out), step);
});
// Pass the output queue to the writer thread.
chunks.push(std::move(out));
state.log(kLogVerbose, "%s\n", "Starting a new frame");
// Fill the input queue for the compression job we just started
status = readData(*in, ZSTD_CStreamInSize(), step, fd, &bytesRead);
}
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
return bytesRead;
}
/**
* Decompress a frame, whose data is streamed into `in`, and stream the output
* to `out`.
*
* @param state The shared state
* @param in Queue that we `pop()` input buffers from. It contains
* exactly one compressed frame.
* @param out Queue that we `push()` decompressed output buffers to
*/
static void decompress(
SharedState& state,
std::shared_ptr<BufferWorkQueue> in,
std::shared_ptr<BufferWorkQueue> out) {
auto& errorHolder = state.errorHolder;
auto guard = makeScopeGuard([&] { out->finish(); });
// Initialize the DCtx
auto ctx = state.dStreamPool->get();
if (!errorHolder.check(ctx != nullptr, "Failed to allocate ZSTD_DStream")) {
return;
}
{
auto err = ZSTD_DCtx_reset(ctx.get(), ZSTD_reset_session_only);
if (!errorHolder.check(!ZSTD_isError(err), ZSTD_getErrorName(err))) {
return;
}
}
const size_t outSize = ZSTD_DStreamOutSize();
Buffer inBuffer;
size_t returnCode = 0;
// Read a buffer in from the input queue
while (in->pop(inBuffer) && !errorHolder.hasError()) {
auto zstdInBuffer = makeZstdInBuffer(inBuffer);
// Decompress the whole buffer and send it to the output queue
while (!inBuffer.empty() && !errorHolder.hasError()) {
// Allocate a buffer with at least outSize bytes.
Buffer outBuffer(outSize);
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
// Decompress
returnCode =
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
return;
}
// Pass the buffer with the decompressed data to the output queue
out->push(split(outBuffer, zstdOutBuffer));
// Advance past the input we already read
advance(inBuffer, zstdInBuffer);
if (returnCode == 0) {
// The frame is over, prepare to (maybe) start a new frame
ZSTD_initDStream(ctx.get());
}
}
}
if (!errorHolder.check(returnCode <= 1, "Incomplete block")) {
return;
}
// We've given ZSTD_decompressStream all of our data, but there may still
// be data to read.
while (returnCode == 1) {
// Allocate a buffer with at least outSize bytes.
Buffer outBuffer(outSize);
auto zstdOutBuffer = makeZstdOutBuffer(outBuffer);
// Pass in no input.
ZSTD_inBuffer zstdInBuffer{nullptr, 0, 0};
// Decompress
returnCode =
ZSTD_decompressStream(ctx.get(), &zstdOutBuffer, &zstdInBuffer);
if (!errorHolder.check(
!ZSTD_isError(returnCode), ZSTD_getErrorName(returnCode))) {
return;
}
// Pass the buffer with the decompressed data to the output queue
out->push(split(outBuffer, zstdOutBuffer));
}
}
std::uint64_t asyncDecompressFrames(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
ThreadPool& executor,
FILE* fd) {
auto framesGuard = makeScopeGuard([&] { frames.finish(); });
std::uint64_t totalBytesRead = 0;
// Split the source up into its component frames.
// If we find our recognized skippable frame we know the next frames size
// which means that we can decompress each standard frame in independently.
// Otherwise, we will decompress using only one decompression task.
const size_t chunkSize = ZSTD_DStreamInSize();
auto status = FileStatus::Continue;
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
// Make a new input queue that we will put the frames's bytes into.
auto in = std::make_shared<BufferWorkQueue>();
auto inGuard = makeScopeGuard([&] { in->finish(); });
// Make a output queue that decompress will put the decompressed data into
auto out = std::make_shared<BufferWorkQueue>();
size_t frameSize;
{
// Calculate the size of the next frame.
// frameSize is 0 if the frame info can't be decoded.
Buffer buffer(SkippableFrame::kSize);
auto bytesRead = std::fread(buffer.data(), 1, buffer.size(), fd);
totalBytesRead += bytesRead;
status = fileStatus(fd);
if (bytesRead == 0 && status != FileStatus::Continue) {
break;
}
buffer.subtract(buffer.size() - bytesRead);
frameSize = SkippableFrame::tryRead(buffer.range());
in->push(std::move(buffer));
}
if (frameSize == 0) {
// We hit a non SkippableFrame, so this will be the last job.
// Make sure that we don't use too much memory
in->setMaxSize(64);
out->setMaxSize(64);
}
// Start decompression in the thread pool
executor.add([&state, in, out] {
return decompress(state, std::move(in), std::move(out));
});
// Pass the output queue to the writer thread
frames.push(std::move(out));
if (frameSize == 0) {
// We hit a non SkippableFrame ==> not compressed by pzstd or corrupted
// Pass the rest of the source to this decompression task
state.log(kLogVerbose, "%s\n",
"Input not in pzstd format, falling back to serial decompression");
while (status == FileStatus::Continue && !state.errorHolder.hasError()) {
status = readData(*in, chunkSize, chunkSize, fd, &totalBytesRead);
}
break;
}
state.log(kLogVerbose, "Decompressing a frame of size %zu", frameSize);
// Fill the input queue for the decompression job we just started
status = readData(*in, chunkSize, frameSize, fd, &totalBytesRead);
}
state.errorHolder.check(status != FileStatus::Error, "Error reading input");
return totalBytesRead;
}
/// Write `data` to `fd`, returns true iff success.
static bool writeData(ByteRange data, FILE* fd) {
while (!data.empty()) {
data.advance(std::fwrite(data.begin(), 1, data.size(), fd));
if (std::ferror(fd)) {
return false;
}
}
return true;
}
std::uint64_t writeFile(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
FILE* outputFd,
bool decompress) {
auto& errorHolder = state.errorHolder;
auto lineClearGuard = makeScopeGuard([&state] {
state.log.clear(kLogInfo);
});
std::uint64_t bytesWritten = 0;
std::shared_ptr<BufferWorkQueue> out;
// Grab the output queue for each decompression job (in order).
while (outs.pop(out)) {
if (errorHolder.hasError()) {
continue;
}
if (!decompress) {
// If we are compressing and want to write skippable frames we can't
// start writing before compression is done because we need to know the
// compressed size.
// Wait for the compressed size to be available and write skippable frame
SkippableFrame frame(out->size());
if (!writeData(frame.data(), outputFd)) {
errorHolder.setError("Failed to write output");
return bytesWritten;
}
bytesWritten += frame.kSize;
}
// For each chunk of the frame: Pop it from the queue and write it
Buffer buffer;
while (out->pop(buffer) && !errorHolder.hasError()) {
if (!writeData(buffer.range(), outputFd)) {
errorHolder.setError("Failed to write output");
return bytesWritten;
}
bytesWritten += buffer.size();
state.log.update(kLogInfo, "Written: %u MB ",
static_cast<std::uint32_t>(bytesWritten >> 20));
}
}
return bytesWritten;
}
}

View file

@ -0,0 +1,153 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "ErrorHolder.h"
#include "Logging.h"
#include "Options.h"
#include "utils/Buffer.h"
#include "utils/Range.h"
#include "utils/ResourcePool.h"
#include "utils/ThreadPool.h"
#include "utils/WorkQueue.h"
#define ZSTD_STATIC_LINKING_ONLY
#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* No deprecation warnings, pzstd itself is deprecated
* and uses deprecated functions
*/
#include "zstd.h"
#undef ZSTD_STATIC_LINKING_ONLY
#include <cstddef>
#include <cstdint>
#include <memory>
namespace pzstd {
/**
* Runs pzstd with `options` and returns the number of bytes written.
* An error occurred if `errorHandler.hasError()`.
*
* @param options The pzstd options to use for (de)compression
* @returns 0 upon success and non-zero on failure.
*/
int pzstdMain(const Options& options);
class SharedState {
public:
SharedState(const Options& options) : log(options.verbosity) {
if (!options.decompress) {
auto parameters = options.determineParameters();
cStreamPool.reset(new ResourcePool<ZSTD_CStream>{
[this, parameters]() -> ZSTD_CStream* {
this->log(kLogVerbose, "%s\n", "Creating new ZSTD_CStream");
auto zcs = ZSTD_createCStream();
if (zcs) {
auto err = ZSTD_initCStream_advanced(
zcs, nullptr, 0, parameters, 0);
if (ZSTD_isError(err)) {
ZSTD_freeCStream(zcs);
return nullptr;
}
}
return zcs;
},
[](ZSTD_CStream *zcs) {
ZSTD_freeCStream(zcs);
}});
} else {
dStreamPool.reset(new ResourcePool<ZSTD_DStream>{
[this]() -> ZSTD_DStream* {
this->log(kLogVerbose, "%s\n", "Creating new ZSTD_DStream");
auto zds = ZSTD_createDStream();
if (zds) {
auto err = ZSTD_initDStream(zds);
if (ZSTD_isError(err)) {
ZSTD_freeDStream(zds);
return nullptr;
}
}
return zds;
},
[](ZSTD_DStream *zds) {
ZSTD_freeDStream(zds);
}});
}
}
~SharedState() {
// The resource pools have references to this, so destroy them first.
cStreamPool.reset();
dStreamPool.reset();
}
Logger log;
ErrorHolder errorHolder;
std::unique_ptr<ResourcePool<ZSTD_CStream>> cStreamPool;
std::unique_ptr<ResourcePool<ZSTD_DStream>> dStreamPool;
};
/**
* Streams input from `fd`, breaks input up into chunks, and compresses each
* chunk independently. Output of each chunk gets streamed to a queue, and
* the output queues get put into `chunks` in order.
*
* @param state The shared state
* @param chunks Each compression jobs output queue gets `pushed()` here
* as soon as it is available
* @param executor The thread pool to run compression jobs in
* @param fd The input file descriptor
* @param size The size of the input file if known, 0 otherwise
* @param numThreads The number of threads in the thread pool
* @param parameters The zstd parameters to use for compression
* @returns The number of bytes read from the file
*/
std::uint64_t asyncCompressChunks(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& chunks,
ThreadPool& executor,
FILE* fd,
std::uintmax_t size,
std::size_t numThreads,
ZSTD_parameters parameters);
/**
* Streams input from `fd`. If pzstd headers are available it breaks the input
* up into independent frames. It sends each frame to an independent
* decompression job. Output of each frame gets streamed to a queue, and
* the output queues get put into `frames` in order.
*
* @param state The shared state
* @param frames Each decompression jobs output queue gets `pushed()` here
* as soon as it is available
* @param executor The thread pool to run compression jobs in
* @param fd The input file descriptor
* @returns The number of bytes read from the file
*/
std::uint64_t asyncDecompressFrames(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& frames,
ThreadPool& executor,
FILE* fd);
/**
* Streams input in from each queue in `outs` in order, and writes the data to
* `outputFd`.
*
* @param state The shared state
* @param outs A queue of output queues, one for each
* (de)compression job.
* @param outputFd The file descriptor to write to
* @param decompress Are we decompressing?
* @returns The number of bytes written
*/
std::uint64_t writeFile(
SharedState& state,
WorkQueue<std::shared_ptr<BufferWorkQueue>>& outs,
FILE* outputFd,
bool decompress);
}

View file

@ -0,0 +1,56 @@
# Parallel Zstandard (PZstandard)
Parallel Zstandard is a Pigz-like tool for Zstandard.
It provides Zstandard format compatible compression and decompression that is able to utilize multiple cores.
It breaks the input up into equal sized chunks and compresses each chunk independently into a Zstandard frame.
It then concatenates the frames together to produce the final compressed output.
Pzstandard will write a 12 byte header for each frame that is a skippable frame in the Zstandard format, which tells PZstandard the size of the next compressed frame.
PZstandard supports parallel decompression of files compressed with PZstandard.
When decompressing files compressed with Zstandard, PZstandard does IO in one thread, and decompression in another.
## Usage
PZstandard supports the same command line interface as Zstandard, but also provides the `-p` option to specify the number of threads.
Dictionary mode is not currently supported.
Basic usage
pzstd input-file -o output-file -p num-threads -# # Compression
pzstd -d input-file -o output-file -p num-threads # Decompression
PZstandard also supports piping and fifo pipes
cat input-file | pzstd -p num-threads -# -c > /dev/null
For more options
pzstd --help
PZstandard tries to pick a smart default number of threads if not specified (displayed in `pzstd --help`).
If this number is not suitable, during compilation you can define `PZSTD_NUM_THREADS` to the number of threads you prefer.
## Benchmarks
As a reference, PZstandard and Pigz were compared on an Intel Core i7 @ 3.1 GHz, each using 4 threads, with the [Silesia compression corpus](http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia).
Compression Speed vs Ratio with 4 Threads | Decompression Speed with 4 Threads
------------------------------------------|-----------------------------------
![Compression Speed vs Ratio](images/Cspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](images/Dspeed.png "Decompression Speed")
The test procedure was to run each of the following commands 2 times for each compression level, and take the minimum time.
time pzstd -# -p 4 -c silesia.tar > silesia.tar.zst
time pzstd -d -p 4 -c silesia.tar.zst > /dev/null
time pigz -# -p 4 -k -c silesia.tar > silesia.tar.gz
time pigz -d -p 4 -k -c silesia.tar.gz > /dev/null
PZstandard was tested using compression levels 1-19, and Pigz was tested using compression levels 1-9.
Pigz cannot do parallel decompression, it simply does each of reading, decompression, and writing on separate threads.
## Tests
Tests require that you have [gtest](https://github.com/google/googletest) installed.
Set `GTEST_INC` and `GTEST_LIB` in `Makefile` to specify the location of the gtest headers and libraries.
Alternatively, run `make googletest`, which will clone googletest and build it.
Run `make tests && make check` to run tests.

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "SkippableFrame.h"
#include "mem.h"
#include "utils/Range.h"
#include <cstdio>
using namespace pzstd;
SkippableFrame::SkippableFrame(std::uint32_t size) : frameSize_(size) {
MEM_writeLE32(data_.data(), kSkippableFrameMagicNumber);
MEM_writeLE32(data_.data() + 4, kFrameContentsSize);
MEM_writeLE32(data_.data() + 8, frameSize_);
}
/* static */ std::size_t SkippableFrame::tryRead(ByteRange bytes) {
if (bytes.size() < SkippableFrame::kSize ||
MEM_readLE32(bytes.begin()) != kSkippableFrameMagicNumber ||
MEM_readLE32(bytes.begin() + 4) != kFrameContentsSize) {
return 0;
}
return MEM_readLE32(bytes.begin() + 8);
}

View file

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/Range.h"
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdio>
namespace pzstd {
/**
* We put a skippable frame before each frame.
* It contains a skippable frame magic number, the size of the skippable frame,
* and the size of the next frame.
* Each skippable frame is exactly 12 bytes in little endian format.
* The first 8 bytes are for compatibility with the ZSTD format.
* If we have N threads, the output will look like
*
* [0x184D2A50|4|size1] [frame1 of size size1]
* [0x184D2A50|4|size2] [frame2 of size size2]
* ...
* [0x184D2A50|4|sizeN] [frameN of size sizeN]
*
* Each sizeX is 4 bytes.
*
* These skippable frames should allow us to skip through the compressed file
* and only load at most N pages.
*/
class SkippableFrame {
public:
static constexpr std::size_t kSize = 12;
private:
std::uint32_t frameSize_;
std::array<std::uint8_t, kSize> data_;
static constexpr std::uint32_t kSkippableFrameMagicNumber = 0x184D2A50;
// Could be improved if the size fits in less bytes
static constexpr std::uint32_t kFrameContentsSize = kSize - 8;
public:
// Write the skippable frame to data_ in LE format.
explicit SkippableFrame(std::uint32_t size);
// Read the skippable frame from bytes in LE format.
static std::size_t tryRead(ByteRange bytes);
ByteRange data() const {
return {data_.data(), data_.size()};
}
// Size of the next frame.
std::size_t frameSize() const {
return frameSize_;
}
};
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View file

@ -0,0 +1,27 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "ErrorHolder.h"
#include "Options.h"
#include "Pzstd.h"
using namespace pzstd;
int main(int argc, const char** argv) {
Options options;
switch (options.parse(argc, argv)) {
case Options::Status::Failure:
return 1;
case Options::Status::Message:
return 0;
default:
break;
}
return pzstdMain(options);
}

View file

@ -0,0 +1,37 @@
cxx_test(
name='options_test',
srcs=['OptionsTest.cpp'],
deps=['//contrib/pzstd:options'],
)
cxx_test(
name='pzstd_test',
srcs=['PzstdTest.cpp'],
deps=[
':round_trip',
'//contrib/pzstd:libpzstd',
'//contrib/pzstd/utils:scope_guard',
'//programs:datagen',
],
)
cxx_binary(
name='round_trip_test',
srcs=['RoundTripTest.cpp'],
deps=[
':round_trip',
'//contrib/pzstd/utils:scope_guard',
'//programs:datagen',
]
)
cxx_library(
name='round_trip',
header_namespace='test',
exported_headers=['RoundTrip.h'],
deps=[
'//contrib/pzstd:libpzstd',
'//contrib/pzstd:options',
'//contrib/pzstd/utils:scope_guard',
]
)

View file

@ -0,0 +1,536 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Options.h"
#include <array>
#include <gtest/gtest.h>
using namespace pzstd;
namespace pzstd {
bool operator==(const Options &lhs, const Options &rhs) {
return lhs.numThreads == rhs.numThreads &&
lhs.maxWindowLog == rhs.maxWindowLog &&
lhs.compressionLevel == rhs.compressionLevel &&
lhs.decompress == rhs.decompress && lhs.inputFiles == rhs.inputFiles &&
lhs.outputFile == rhs.outputFile && lhs.overwrite == rhs.overwrite &&
lhs.keepSource == rhs.keepSource && lhs.writeMode == rhs.writeMode &&
lhs.checksum == rhs.checksum && lhs.verbosity == rhs.verbosity;
}
std::ostream &operator<<(std::ostream &out, const Options &opt) {
out << "{";
{
out << "\n\t"
<< "numThreads: " << opt.numThreads;
out << ",\n\t"
<< "maxWindowLog: " << opt.maxWindowLog;
out << ",\n\t"
<< "compressionLevel: " << opt.compressionLevel;
out << ",\n\t"
<< "decompress: " << opt.decompress;
out << ",\n\t"
<< "inputFiles: {";
{
bool first = true;
for (const auto &file : opt.inputFiles) {
if (!first) {
out << ",";
}
first = false;
out << "\n\t\t" << file;
}
}
out << "\n\t}";
out << ",\n\t"
<< "outputFile: " << opt.outputFile;
out << ",\n\t"
<< "overwrite: " << opt.overwrite;
out << ",\n\t"
<< "keepSource: " << opt.keepSource;
out << ",\n\t"
<< "writeMode: " << static_cast<int>(opt.writeMode);
out << ",\n\t"
<< "checksum: " << opt.checksum;
out << ",\n\t"
<< "verbosity: " << opt.verbosity;
}
out << "\n}";
return out;
}
}
namespace {
#ifdef _WIN32
const char nullOutput[] = "nul";
#else
const char nullOutput[] = "/dev/null";
#endif
constexpr auto autoMode = Options::WriteMode::Auto;
} // anonymous namespace
#define EXPECT_SUCCESS(...) EXPECT_EQ(Options::Status::Success, __VA_ARGS__)
#define EXPECT_FAILURE(...) EXPECT_EQ(Options::Status::Failure, __VA_ARGS__)
#define EXPECT_MESSAGE(...) EXPECT_EQ(Options::Status::Message, __VA_ARGS__)
template <typename... Args>
std::array<const char *, sizeof...(Args) + 1> makeArray(Args... args) {
return {{nullptr, args...}};
}
TEST(Options, ValidInputs) {
{
Options options;
auto args = makeArray("--processes", "5", "-o", "x", "y", "-f");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {5, 23, 3, false, {"y"}, "x",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("-p", "1", "input", "-19");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 19, false, {"input"}, "",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args =
makeArray("--ultra", "-22", "-p", "1", "-o", "x", "-d", "x.zst", "-f");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 0, 22, true, {"x.zst"}, "x",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("--processes", "100", "hello.zst", "--decompress",
"--force");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {100, 23, 3, true, {"hello.zst"}, "", true,
true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-dp", "1", "-c");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, true, {"x"}, "-",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-dp", "1", "--stdout");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 23, 3, true, {"x"}, "-",
false, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("-p", "1", "x", "-5", "-fo", "-", "--ultra", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {1, 0, 5, true, {"x"}, "-",
true, true, autoMode, true, 2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("silesia.tar", "-o", "silesia.tar.pzstd", "-p", "2");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {2,
23,
3,
false,
{"silesia.tar"},
"silesia.tar.pzstd",
false,
true,
autoMode,
true,
2};
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
}
TEST(Options, GetOutputFile) {
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("x.zst", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x", "y", "-o", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x.zst", "-do", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(nullOutput, options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("x.zst", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("x", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("xzst", "-d");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("", options.getOutputFile(options.inputFiles[0]));
}
{
Options options;
auto args = makeArray("xzst", "-doxx");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("xx", options.getOutputFile(options.inputFiles[0]));
}
}
TEST(Options, MultipleFiles) {
{
Options options;
auto args = makeArray("x", "y", "z");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected;
expected.inputFiles = {"x", "y", "z"};
expected.verbosity = 1;
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "y", "z", "-o", nullOutput);
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected;
expected.inputFiles = {"x", "y", "z"};
expected.outputFile = nullOutput;
expected.verbosity = 1;
EXPECT_EQ(expected, options);
}
{
Options options;
auto args = makeArray("x", "y", "-o-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "y", "-o", "file");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-qqvd12qp4", "-f", "x", "--", "--rm", "-c");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
Options expected = {4, 23, 12, true, {"x", "--rm", "-c"},
"", true, true, autoMode, true,
0};
EXPECT_EQ(expected, options);
}
}
TEST(Options, NumThreads) {
{
Options options;
auto args = makeArray("x", "-dfo", "-");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "0", "-fo", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-f", "-p", "-o", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadCompressionLevel) {
{
Options options;
auto args = makeArray("x", "-20");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "--ultra", "-23");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "--1"); // negative 1?
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, InvalidOption) {
{
Options options;
auto args = makeArray("x", "-x");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, BadOutputFile) {
{
Options options;
auto args = makeArray("notzst", "-d", "-p", "1");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ("", options.getOutputFile(options.inputFiles.front()));
}
}
TEST(Options, BadOptionsWithArguments) {
{
Options options;
auto args = makeArray("x", "-pf");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p", "10f");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-p");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-o");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("x", "-o");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, KeepSource) {
{
Options options;
auto args = makeArray("x", "--rm", "-k");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x", "--rm", "--keep");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
}
{
Options options;
auto args = makeArray("x", "--rm");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(false, options.keepSource);
}
}
TEST(Options, Verbosity) {
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(2, options.verbosity);
}
{
Options options;
auto args = makeArray("--quiet", "-qq", "x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(-1, options.verbosity);
}
{
Options options;
auto args = makeArray("x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("--", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("-qv", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(1, options.verbosity);
}
{
Options options;
auto args = makeArray("-v", "x", "y");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(3, options.verbosity);
}
{
Options options;
auto args = makeArray("-v", "x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(3, options.verbosity);
}
}
TEST(Options, TestMode) {
{
Options options;
auto args = makeArray("x", "-t");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
EXPECT_EQ(true, options.decompress);
EXPECT_EQ(nullOutput, options.outputFile);
}
{
Options options;
auto args = makeArray("x", "--test", "--rm", "-ohello");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.keepSource);
EXPECT_EQ(true, options.decompress);
EXPECT_EQ(nullOutput, options.outputFile);
}
}
TEST(Options, Checksum) {
{
Options options;
auto args = makeArray("x.zst", "--no-check", "-Cd");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x", "--no-check", "--check");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(true, options.checksum);
}
{
Options options;
auto args = makeArray("x", "--no-check");
EXPECT_SUCCESS(options.parse(args.size(), args.data()));
EXPECT_EQ(false, options.checksum);
}
}
TEST(Options, InputFiles) {
{
Options options;
auto args = makeArray("-cd");
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray();
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray("-d");
options.parse(args.size(), args.data());
EXPECT_EQ(1, options.inputFiles.size());
EXPECT_EQ("-", options.inputFiles[0]);
EXPECT_EQ("-", options.outputFile);
}
{
Options options;
auto args = makeArray("x", "-");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, InvalidOptions) {
{
Options options;
auto args = makeArray("-ibasdf");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("- ");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-n15");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-0", "x");
EXPECT_FAILURE(options.parse(args.size(), args.data()));
}
}
TEST(Options, Extras) {
{
Options options;
auto args = makeArray("-h");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-H");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("-V");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("--help");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
{
Options options;
auto args = makeArray("--version");
EXPECT_MESSAGE(options.parse(args.size(), args.data()));
}
}

View file

@ -0,0 +1,149 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "Pzstd.h"
extern "C" {
#include "datagen.h"
}
#include "test/RoundTrip.h"
#include "utils/ScopeGuard.h"
#include <cstddef>
#include <cstdio>
#include <gtest/gtest.h>
#include <memory>
#include <random>
using namespace std;
using namespace pzstd;
TEST(Pzstd, SmallSizes) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.SmallSizes seed: %u\n", seed);
std::mt19937 gen(seed);
for (unsigned len = 1; len < 256; ++len) {
if (len % 16 == 0) {
std::fprintf(stderr, "%u / 16\n", len / 16);
}
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
static uint8_t buf[256];
RDG_genBuffer(buf, len, 0.5, 0.0, gen());
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf, 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned numThreads = 1; numThreads <= 2; ++numThreads) {
for (unsigned level = 1; level <= 4; level *= 4) {
auto errorGuard = makeScopeGuard([&] {
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = numThreads;
options.compressionLevel = level;
options.verbosity = 1;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
TEST(Pzstd, LargeSizes) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.LargeSizes seed: %u\n", seed);
std::mt19937 gen(seed);
for (unsigned len = 1 << 20; len <= (1 << 24); len *= 2) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[len]);
RDG_genBuffer(buf.get(), len, 0.5, 0.0, gen());
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, len, fd);
std::fclose(fd);
ASSERT_EQ(written, len);
}
for (unsigned numThreads = 1; numThreads <= 16; numThreads *= 4) {
for (unsigned level = 1; level <= 4; level *= 4) {
auto errorGuard = makeScopeGuard([&] {
std::fprintf(stderr, "# threads: %u\n", numThreads);
std::fprintf(stderr, "compression level: %u\n", level);
});
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = std::min(numThreads, options.numThreads);
options.compressionLevel = level;
options.verbosity = 1;
ASSERT_TRUE(roundTrip(options));
errorGuard.dismiss();
}
}
}
}
TEST(Pzstd, DISABLED_ExtremelyLargeSize) {
unsigned seed = std::random_device{}();
std::fprintf(stderr, "Pzstd.ExtremelyLargeSize seed: %u\n", seed);
std::mt19937 gen(seed);
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
// Write 4GB + 64 MB
constexpr size_t kLength = 1 << 26;
std::unique_ptr<uint8_t[]> buf(new uint8_t[kLength]);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto closeGuard = makeScopeGuard([&] { std::fclose(fd); });
for (size_t i = 0; i < (1 << 6) + 1; ++i) {
RDG_genBuffer(buf.get(), kLength, 0.5, 0.0, gen());
auto written = std::fwrite(buf.get(), 1, kLength, fd);
if (written != kLength) {
std::fprintf(stderr, "Failed to write file, skipping test\n");
return;
}
}
}
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.compressionLevel = 1;
if (options.numThreads == 0) {
options.numThreads = 1;
}
ASSERT_TRUE(roundTrip(options));
}
TEST(Pzstd, ExtremelyCompressible) {
std::string inputFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
{
std::unique_ptr<uint8_t[]> buf(new uint8_t[10000]);
std::memset(buf.get(), 'a', 10000);
auto fd = std::fopen(inputFile.c_str(), "wb");
auto written = std::fwrite(buf.get(), 1, 10000, fd);
std::fclose(fd);
ASSERT_EQ(written, 10000);
}
Options options;
options.overwrite = true;
options.inputFiles = {inputFile};
options.numThreads = 1;
options.compressionLevel = 1;
ASSERT_TRUE(roundTrip(options));
}

View file

@ -0,0 +1,86 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "Options.h"
#include "Pzstd.h"
#include "utils/ScopeGuard.h"
#include <cstdio>
#include <string>
#include <cstdint>
#include <memory>
namespace pzstd {
inline bool check(std::string source, std::string decompressed) {
std::unique_ptr<std::uint8_t[]> sBuf(new std::uint8_t[1024]);
std::unique_ptr<std::uint8_t[]> dBuf(new std::uint8_t[1024]);
auto sFd = std::fopen(source.c_str(), "rb");
auto dFd = std::fopen(decompressed.c_str(), "rb");
auto guard = makeScopeGuard([&] {
std::fclose(sFd);
std::fclose(dFd);
});
size_t sRead, dRead;
do {
sRead = std::fread(sBuf.get(), 1, 1024, sFd);
dRead = std::fread(dBuf.get(), 1, 1024, dFd);
if (std::ferror(sFd) || std::ferror(dFd)) {
return false;
}
if (sRead != dRead) {
return false;
}
for (size_t i = 0; i < sRead; ++i) {
if (sBuf.get()[i] != dBuf.get()[i]) {
return false;
}
}
} while (sRead == 1024);
if (!std::feof(sFd) || !std::feof(dFd)) {
return false;
}
return true;
}
inline bool roundTrip(Options& options) {
if (options.inputFiles.size() != 1) {
return false;
}
std::string source = options.inputFiles.front();
std::string compressedFile = std::tmpnam(nullptr);
std::string decompressedFile = std::tmpnam(nullptr);
auto guard = makeScopeGuard([&] {
std::remove(compressedFile.c_str());
std::remove(decompressedFile.c_str());
});
{
options.outputFile = compressedFile;
options.decompress = false;
if (pzstdMain(options) != 0) {
return false;
}
}
{
options.decompress = true;
options.inputFiles.front() = compressedFile;
options.outputFile = decompressedFile;
if (pzstdMain(options) != 0) {
return false;
}
}
return check(source, decompressedFile);
}
}

View file

@ -0,0 +1,86 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
extern "C" {
#include "datagen.h"
}
#include "Options.h"
#include "test/RoundTrip.h"
#include "utils/ScopeGuard.h"
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <memory>
#include <random>
using namespace std;
using namespace pzstd;
namespace {
string
writeData(size_t size, double matchProba, double litProba, unsigned seed) {
std::unique_ptr<uint8_t[]> buf(new uint8_t[size]);
RDG_genBuffer(buf.get(), size, matchProba, litProba, seed);
string file = tmpnam(nullptr);
auto fd = std::fopen(file.c_str(), "wb");
auto guard = makeScopeGuard([&] { std::fclose(fd); });
auto bytesWritten = std::fwrite(buf.get(), 1, size, fd);
if (bytesWritten != size) {
std::abort();
}
return file;
}
template <typename Generator>
string generateInputFile(Generator& gen) {
// Use inputs ranging from 1 Byte to 2^16 Bytes
std::uniform_int_distribution<size_t> size{1, 1 << 16};
std::uniform_real_distribution<> prob{0, 1};
return writeData(size(gen), prob(gen), prob(gen), gen());
}
template <typename Generator>
Options generateOptions(Generator& gen, const string& inputFile) {
Options options;
options.inputFiles = {inputFile};
options.overwrite = true;
std::uniform_int_distribution<unsigned> numThreads{1, 32};
std::uniform_int_distribution<unsigned> compressionLevel{1, 10};
options.numThreads = numThreads(gen);
options.compressionLevel = compressionLevel(gen);
return options;
}
}
int main() {
std::mt19937 gen(std::random_device{}());
auto newlineGuard = makeScopeGuard([] { std::fprintf(stderr, "\n"); });
for (unsigned i = 0; i < 10000; ++i) {
if (i % 100 == 0) {
std::fprintf(stderr, "Progress: %u%%\r", i / 100);
}
auto inputFile = generateInputFile(gen);
auto inputGuard = makeScopeGuard([&] { std::remove(inputFile.c_str()); });
for (unsigned i = 0; i < 10; ++i) {
auto options = generateOptions(gen, inputFile);
if (!roundTrip(options)) {
std::fprintf(stderr, "numThreads: %u\n", options.numThreads);
std::fprintf(stderr, "level: %u\n", options.compressionLevel);
std::fprintf(stderr, "decompress? %u\n", (unsigned)options.decompress);
std::fprintf(stderr, "file: %s\n", inputFile.c_str());
return 1;
}
}
}
return 0;
}

View file

@ -0,0 +1,75 @@
cxx_library(
name='buffer',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['Buffer.h'],
deps=[':range'],
)
cxx_library(
name='file_system',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['FileSystem.h'],
deps=[':range'],
)
cxx_library(
name='likely',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['Likely.h'],
)
cxx_library(
name='range',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['Range.h'],
deps=[':likely'],
)
cxx_library(
name='resource_pool',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['ResourcePool.h'],
)
cxx_library(
name='scope_guard',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['ScopeGuard.h'],
)
cxx_library(
name='thread_pool',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['ThreadPool.h'],
deps=[':work_queue'],
)
cxx_library(
name='work_queue',
visibility=['PUBLIC'],
header_namespace='utils',
exported_headers=['WorkQueue.h'],
deps=[':buffer'],
)
cxx_library(
name='utils',
visibility=['PUBLIC'],
deps=[
':buffer',
':file_system',
':likely',
':range',
':resource_pool',
':scope_guard',
':thread_pool',
':work_queue',
],
)

View file

@ -0,0 +1,99 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/Range.h"
#include <array>
#include <cstddef>
#include <memory>
namespace pzstd {
/**
* A `Buffer` has a pointer to a shared buffer, and a range of the buffer that
* it owns.
* The idea is that you can allocate one buffer, and write chunks into it
* and break off those chunks.
* The underlying buffer is reference counted, and will be destroyed when all
* `Buffer`s that reference it are destroyed.
*/
class Buffer {
std::shared_ptr<unsigned char> buffer_;
MutableByteRange range_;
static void delete_buffer(unsigned char* buffer) {
delete[] buffer;
}
public:
/// Construct an empty buffer that owns no data.
explicit Buffer() {}
/// Construct a `Buffer` that owns a new underlying buffer of size `size`.
explicit Buffer(std::size_t size)
: buffer_(new unsigned char[size], delete_buffer),
range_(buffer_.get(), buffer_.get() + size) {}
explicit Buffer(std::shared_ptr<unsigned char> buffer, MutableByteRange data)
: buffer_(buffer), range_(data) {}
Buffer(Buffer&&) = default;
Buffer& operator=(Buffer&&) & = default;
/**
* Splits the data into two pieces: [begin, begin + n), [begin + n, end).
* Their data both points into the same underlying buffer.
* Modifies the original `Buffer` to point to only [begin + n, end).
*
* @param n The offset to split at.
* @returns A buffer that owns the data [begin, begin + n).
*/
Buffer splitAt(std::size_t n) {
auto firstPiece = range_.subpiece(0, n);
range_.advance(n);
return Buffer(buffer_, firstPiece);
}
/// Modifies the buffer to point to the range [begin + n, end).
void advance(std::size_t n) {
range_.advance(n);
}
/// Modifies the buffer to point to the range [begin, end - n).
void subtract(std::size_t n) {
range_.subtract(n);
}
/// Returns a read only `Range` pointing to the `Buffer`s data.
ByteRange range() const {
return range_;
}
/// Returns a mutable `Range` pointing to the `Buffer`s data.
MutableByteRange range() {
return range_;
}
const unsigned char* data() const {
return range_.data();
}
unsigned char* data() {
return range_.data();
}
std::size_t size() const {
return range_.size();
}
bool empty() const {
return range_.empty();
}
};
}

View file

@ -0,0 +1,94 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/Range.h"
#include <sys/stat.h>
#include <cerrno>
#include <cstdint>
#include <system_error>
// A small subset of `std::filesystem`.
// `std::filesystem` should be a drop in replacement.
// See http://en.cppreference.com/w/cpp/filesystem for documentation.
namespace pzstd {
// using file_status = ... causes gcc to emit a false positive warning
#if defined(_MSC_VER)
typedef struct ::_stat64 file_status;
#else
typedef struct ::stat file_status;
#endif
/// http://en.cppreference.com/w/cpp/filesystem/status
inline file_status status(StringPiece path, std::error_code& ec) noexcept {
file_status status;
#if defined(_MSC_VER)
const auto error = ::_stat64(path.data(), &status);
#else
const auto error = ::stat(path.data(), &status);
#endif
if (error) {
ec.assign(errno, std::generic_category());
} else {
ec.clear();
}
return status;
}
/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
inline bool is_regular_file(file_status status) noexcept {
#if defined(S_ISREG)
return S_ISREG(status.st_mode);
#elif !defined(S_ISREG) && defined(S_IFMT) && defined(S_IFREG)
return (status.st_mode & S_IFMT) == S_IFREG;
#else
static_assert(false, "No POSIX stat() support.");
#endif
}
/// http://en.cppreference.com/w/cpp/filesystem/is_regular_file
inline bool is_regular_file(StringPiece path, std::error_code& ec) noexcept {
return is_regular_file(status(path, ec));
}
/// http://en.cppreference.com/w/cpp/filesystem/is_directory
inline bool is_directory(file_status status) noexcept {
#if defined(S_ISDIR)
return S_ISDIR(status.st_mode);
#elif !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR)
return (status.st_mode & S_IFMT) == S_IFDIR;
#else
static_assert(false, "NO POSIX stat() support.");
#endif
}
/// http://en.cppreference.com/w/cpp/filesystem/is_directory
inline bool is_directory(StringPiece path, std::error_code& ec) noexcept {
return is_directory(status(path, ec));
}
/// http://en.cppreference.com/w/cpp/filesystem/file_size
inline std::uintmax_t file_size(
StringPiece path,
std::error_code& ec) noexcept {
auto stat = status(path, ec);
if (ec) {
return -1;
}
if (!is_regular_file(stat)) {
ec.assign(ENOTSUP, std::generic_category());
return -1;
}
ec.clear();
return stat.st_size;
}
}

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
/**
* Compiler hints to indicate the fast path of an "if" branch: whether
* the if condition is likely to be true or false.
*
* @author Tudor Bosman (tudorb@fb.com)
*/
#pragma once
#undef LIKELY
#undef UNLIKELY
#if defined(__GNUC__) && __GNUC__ >= 4
#define LIKELY(x) (__builtin_expect((x), 1))
#define UNLIKELY(x) (__builtin_expect((x), 0))
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif

View file

@ -0,0 +1,131 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
/**
* A subset of `folly/Range.h`.
* All code copied verbatim modulo formatting
*/
#pragma once
#include "utils/Likely.h"
#include <cstddef>
#include <cstring>
#include <stdexcept>
#include <string>
#include <type_traits>
namespace pzstd {
namespace detail {
/*
*Use IsCharPointer<T>::type to enable const char* or char*.
*Use IsCharPointer<T>::const_type to enable only const char*.
*/
template <class T>
struct IsCharPointer {};
template <>
struct IsCharPointer<char*> {
typedef int type;
};
template <>
struct IsCharPointer<const char*> {
typedef int const_type;
typedef int type;
};
} // namespace detail
template <typename Iter>
class Range {
Iter b_;
Iter e_;
public:
using size_type = std::size_t;
using iterator = Iter;
using const_iterator = Iter;
using value_type = typename std::remove_reference<
typename std::iterator_traits<Iter>::reference>::type;
using reference = typename std::iterator_traits<Iter>::reference;
constexpr Range() : b_(), e_() {}
constexpr Range(Iter begin, Iter end) : b_(begin), e_(end) {}
constexpr Range(Iter begin, size_type size) : b_(begin), e_(begin + size) {}
template <class T = Iter, typename detail::IsCharPointer<T>::type = 0>
/* implicit */ Range(Iter str) : b_(str), e_(str + std::strlen(str)) {}
template <class T = Iter, typename detail::IsCharPointer<T>::const_type = 0>
/* implicit */ Range(const std::string& str)
: b_(str.data()), e_(b_ + str.size()) {}
// Allow implicit conversion from Range<From> to Range<To> if From is
// implicitly convertible to To.
template <
class OtherIter,
typename std::enable_if<
(!std::is_same<Iter, OtherIter>::value &&
std::is_convertible<OtherIter, Iter>::value),
int>::type = 0>
constexpr /* implicit */ Range(const Range<OtherIter>& other)
: b_(other.begin()), e_(other.end()) {}
Range(const Range&) = default;
Range(Range&&) = default;
Range& operator=(const Range&) & = default;
Range& operator=(Range&&) & = default;
constexpr size_type size() const {
return e_ - b_;
}
bool empty() const {
return b_ == e_;
}
Iter data() const {
return b_;
}
Iter begin() const {
return b_;
}
Iter end() const {
return e_;
}
void advance(size_type n) {
if (UNLIKELY(n > size())) {
throw std::out_of_range("index out of range");
}
b_ += n;
}
void subtract(size_type n) {
if (UNLIKELY(n > size())) {
throw std::out_of_range("index out of range");
}
e_ -= n;
}
Range subpiece(size_type first, size_type length = std::string::npos) const {
if (UNLIKELY(first > size())) {
throw std::out_of_range("index out of range");
}
return Range(b_ + first, std::min(length, size() - first));
}
};
using ByteRange = Range<const unsigned char*>;
using MutableByteRange = Range<unsigned char*>;
using StringPiece = Range<const char*>;
}

View file

@ -0,0 +1,96 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <cassert>
#include <functional>
#include <memory>
#include <mutex>
#include <vector>
namespace pzstd {
/**
* An unbounded pool of resources.
* A `ResourcePool<T>` requires a factory function that takes allocates `T*` and
* a free function that frees a `T*`.
* Calling `ResourcePool::get()` will give you a new `ResourcePool::UniquePtr`
* to a `T`, and when it goes out of scope the resource will be returned to the
* pool.
* The `ResourcePool<T>` *must* survive longer than any resources it hands out.
* Remember that `ResourcePool<T>` hands out mutable `T`s, so make sure to clean
* up the resource before or after every use.
*/
template <typename T>
class ResourcePool {
public:
class Deleter;
using Factory = std::function<T*()>;
using Free = std::function<void(T*)>;
using UniquePtr = std::unique_ptr<T, Deleter>;
private:
std::mutex mutex_;
Factory factory_;
Free free_;
std::vector<T*> resources_;
unsigned inUse_;
public:
/**
* Creates a `ResourcePool`.
*
* @param factory The function to use to create new resources.
* @param free The function to use to free resources created by `factory`.
*/
ResourcePool(Factory factory, Free free)
: factory_(std::move(factory)), free_(std::move(free)), inUse_(0) {}
/**
* @returns A unique pointer to a resource. The resource is null iff
* there are no available resources and `factory()` returns null.
*/
UniquePtr get() {
std::lock_guard<std::mutex> lock(mutex_);
if (!resources_.empty()) {
UniquePtr resource{resources_.back(), Deleter{*this}};
resources_.pop_back();
++inUse_;
return resource;
}
UniquePtr resource{factory_(), Deleter{*this}};
++inUse_;
return resource;
}
~ResourcePool() noexcept {
assert(inUse_ == 0);
for (const auto resource : resources_) {
free_(resource);
}
}
class Deleter {
ResourcePool *pool_;
public:
explicit Deleter(ResourcePool &pool) : pool_(&pool) {}
void operator() (T *resource) {
std::lock_guard<std::mutex> lock(pool_->mutex_);
// Make sure we don't put null resources into the pool
if (resource) {
pool_->resources_.push_back(resource);
}
assert(pool_->inUse_ > 0);
--pool_->inUse_;
}
};
};
}

View file

@ -0,0 +1,50 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include <utility>
namespace pzstd {
/**
* Dismissable scope guard.
* `Function` must be callable and take no parameters.
* Unless `dissmiss()` is called, the callable is executed upon destruction of
* `ScopeGuard`.
*
* Example:
*
* auto guard = makeScopeGuard([&] { cleanup(); });
*/
template <typename Function>
class ScopeGuard {
Function function;
bool dismissed;
public:
explicit ScopeGuard(Function&& function)
: function(std::move(function)), dismissed(false) {}
void dismiss() {
dismissed = true;
}
~ScopeGuard() noexcept {
if (!dismissed) {
function();
}
}
};
/// Creates a scope guard from `function`.
template <typename Function>
ScopeGuard<Function> makeScopeGuard(Function&& function) {
return ScopeGuard<Function>(std::forward<Function>(function));
}
}

View file

@ -0,0 +1,58 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/WorkQueue.h"
#include <cstddef>
#include <functional>
#include <thread>
#include <vector>
namespace pzstd {
/// A simple thread pool that pulls tasks off its queue in FIFO order.
class ThreadPool {
std::vector<std::thread> threads_;
WorkQueue<std::function<void()>> tasks_;
public:
/// Constructs a thread pool with `numThreads` threads.
explicit ThreadPool(std::size_t numThreads) {
threads_.reserve(numThreads);
for (std::size_t i = 0; i < numThreads; ++i) {
threads_.emplace_back([this] {
std::function<void()> task;
while (tasks_.pop(task)) {
task();
}
});
}
}
/// Finishes all tasks currently in the queue.
~ThreadPool() {
tasks_.finish();
for (auto& thread : threads_) {
thread.join();
}
}
/**
* Adds `task` to the queue of tasks to execute. Since `task` is a
* `std::function<>`, it cannot be a move only type. So any lambda passed must
* not capture move only types (like `std::unique_ptr`).
*
* @param task The task to execute.
*/
void add(std::function<void()> task) {
tasks_.push(std::move(task));
}
};
}

View file

@ -0,0 +1,181 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#pragma once
#include "utils/Buffer.h"
#include <atomic>
#include <cassert>
#include <cstddef>
#include <condition_variable>
#include <cstddef>
#include <functional>
#include <mutex>
#include <queue>
namespace pzstd {
/// Unbounded thread-safe work queue.
template <typename T>
class WorkQueue {
// Protects all member variable access
std::mutex mutex_;
std::condition_variable readerCv_;
std::condition_variable writerCv_;
std::condition_variable finishCv_;
std::queue<T> queue_;
bool done_;
std::size_t maxSize_;
// Must have lock to call this function
bool full() const {
if (maxSize_ == 0) {
return false;
}
return queue_.size() >= maxSize_;
}
public:
/**
* Constructs an empty work queue with an optional max size.
* If `maxSize == 0` the queue size is unbounded.
*
* @param maxSize The maximum allowed size of the work queue.
*/
WorkQueue(std::size_t maxSize = 0) : done_(false), maxSize_(maxSize) {}
/**
* Push an item onto the work queue. Notify a single thread that work is
* available. If `finish()` has been called, do nothing and return false.
* If `push()` returns false, then `item` has not been moved from.
*
* @param item Item to push onto the queue.
* @returns True upon success, false if `finish()` has been called. An
* item was pushed iff `push()` returns true.
*/
bool push(T&& item) {
{
std::unique_lock<std::mutex> lock(mutex_);
while (full() && !done_) {
writerCv_.wait(lock);
}
if (done_) {
return false;
}
queue_.push(std::move(item));
}
readerCv_.notify_one();
return true;
}
/**
* Attempts to pop an item off the work queue. It will block until data is
* available or `finish()` has been called.
*
* @param[out] item If `pop` returns `true`, it contains the popped item.
* If `pop` returns `false`, it is unmodified.
* @returns True upon success. False if the queue is empty and
* `finish()` has been called.
*/
bool pop(T& item) {
{
std::unique_lock<std::mutex> lock(mutex_);
while (queue_.empty() && !done_) {
readerCv_.wait(lock);
}
if (queue_.empty()) {
assert(done_);
return false;
}
item = std::move(queue_.front());
queue_.pop();
}
writerCv_.notify_one();
return true;
}
/**
* Sets the maximum queue size. If `maxSize == 0` then it is unbounded.
*
* @param maxSize The new maximum queue size.
*/
void setMaxSize(std::size_t maxSize) {
{
std::lock_guard<std::mutex> lock(mutex_);
maxSize_ = maxSize;
}
writerCv_.notify_all();
}
/**
* Promise that `push()` won't be called again, so once the queue is empty
* there will never any more work.
*/
void finish() {
{
std::lock_guard<std::mutex> lock(mutex_);
assert(!done_);
done_ = true;
}
readerCv_.notify_all();
writerCv_.notify_all();
finishCv_.notify_all();
}
/// Blocks until `finish()` has been called (but the queue may not be empty).
void waitUntilFinished() {
std::unique_lock<std::mutex> lock(mutex_);
while (!done_) {
finishCv_.wait(lock);
}
}
};
/// Work queue for `Buffer`s that knows the total number of bytes in the queue.
class BufferWorkQueue {
WorkQueue<Buffer> queue_;
std::atomic<std::size_t> size_;
public:
BufferWorkQueue(std::size_t maxSize = 0) : queue_(maxSize), size_(0) {}
void push(Buffer buffer) {
size_.fetch_add(buffer.size());
queue_.push(std::move(buffer));
}
bool pop(Buffer& buffer) {
bool result = queue_.pop(buffer);
if (result) {
size_.fetch_sub(buffer.size());
}
return result;
}
void setMaxSize(std::size_t maxSize) {
queue_.setMaxSize(maxSize);
}
void finish() {
queue_.finish();
}
/**
* Blocks until `finish()` has been called.
*
* @returns The total number of bytes of all the `Buffer`s currently in the
* queue.
*/
std::size_t size() {
queue_.waitUntilFinished();
return size_.load();
}
};
}

View file

@ -0,0 +1,35 @@
cxx_test(
name='buffer_test',
srcs=['BufferTest.cpp'],
deps=['//contrib/pzstd/utils:buffer'],
)
cxx_test(
name='range_test',
srcs=['RangeTest.cpp'],
deps=['//contrib/pzstd/utils:range'],
)
cxx_test(
name='resource_pool_test',
srcs=['ResourcePoolTest.cpp'],
deps=['//contrib/pzstd/utils:resource_pool'],
)
cxx_test(
name='scope_guard_test',
srcs=['ScopeGuardTest.cpp'],
deps=['//contrib/pzstd/utils:scope_guard'],
)
cxx_test(
name='thread_pool_test',
srcs=['ThreadPoolTest.cpp'],
deps=['//contrib/pzstd/utils:thread_pool'],
)
cxx_test(
name='work_queue_test',
srcs=['RangeTest.cpp'],
deps=['//contrib/pzstd/utils:work_queue'],
)

View file

@ -0,0 +1,89 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/Buffer.h"
#include "utils/Range.h"
#include <gtest/gtest.h>
#include <memory>
using namespace pzstd;
namespace {
void deleter(const unsigned char* buf) {
delete[] buf;
}
}
TEST(Buffer, Constructors) {
Buffer empty;
EXPECT_TRUE(empty.empty());
EXPECT_EQ(0, empty.size());
Buffer sized(5);
EXPECT_FALSE(sized.empty());
EXPECT_EQ(5, sized.size());
Buffer moved(std::move(sized));
EXPECT_FALSE(sized.empty());
EXPECT_EQ(5, sized.size());
Buffer assigned;
assigned = std::move(moved);
EXPECT_FALSE(sized.empty());
EXPECT_EQ(5, sized.size());
}
TEST(Buffer, BufferManagement) {
std::shared_ptr<unsigned char> buf(new unsigned char[10], deleter);
{
Buffer acquired(buf, MutableByteRange(buf.get(), buf.get() + 10));
EXPECT_EQ(2, buf.use_count());
Buffer moved(std::move(acquired));
EXPECT_EQ(2, buf.use_count());
Buffer assigned;
assigned = std::move(moved);
EXPECT_EQ(2, buf.use_count());
Buffer split = assigned.splitAt(5);
EXPECT_EQ(3, buf.use_count());
split.advance(1);
assigned.subtract(1);
EXPECT_EQ(3, buf.use_count());
}
EXPECT_EQ(1, buf.use_count());
}
TEST(Buffer, Modifiers) {
Buffer buf(10);
{
unsigned char i = 0;
for (auto& byte : buf.range()) {
byte = i++;
}
}
auto prefix = buf.splitAt(2);
ASSERT_EQ(2, prefix.size());
EXPECT_EQ(0, *prefix.data());
ASSERT_EQ(8, buf.size());
EXPECT_EQ(2, *buf.data());
buf.advance(2);
EXPECT_EQ(4, *buf.data());
EXPECT_EQ(9, *(buf.range().end() - 1));
buf.subtract(2);
EXPECT_EQ(7, *(buf.range().end() - 1));
EXPECT_EQ(4, buf.size());
}

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/Range.h"
#include <gtest/gtest.h>
#include <string>
using namespace pzstd;
// Range is directly copied from folly.
// Just some sanity tests to make sure everything seems to work.
TEST(Range, Constructors) {
StringPiece empty;
EXPECT_TRUE(empty.empty());
EXPECT_EQ(0, empty.size());
std::string str = "hello";
{
Range<std::string::const_iterator> piece(str.begin(), str.end());
EXPECT_EQ(5, piece.size());
EXPECT_EQ('h', *piece.data());
EXPECT_EQ('o', *(piece.end() - 1));
}
{
StringPiece piece(str.data(), str.size());
EXPECT_EQ(5, piece.size());
EXPECT_EQ('h', *piece.data());
EXPECT_EQ('o', *(piece.end() - 1));
}
{
StringPiece piece(str);
EXPECT_EQ(5, piece.size());
EXPECT_EQ('h', *piece.data());
EXPECT_EQ('o', *(piece.end() - 1));
}
{
StringPiece piece(str.c_str());
EXPECT_EQ(5, piece.size());
EXPECT_EQ('h', *piece.data());
EXPECT_EQ('o', *(piece.end() - 1));
}
}
TEST(Range, Modifiers) {
StringPiece range("hello world");
ASSERT_EQ(11, range.size());
{
auto hello = range.subpiece(0, 5);
EXPECT_EQ(5, hello.size());
EXPECT_EQ('h', *hello.data());
EXPECT_EQ('o', *(hello.end() - 1));
}
{
auto hello = range;
hello.subtract(6);
EXPECT_EQ(5, hello.size());
EXPECT_EQ('h', *hello.data());
EXPECT_EQ('o', *(hello.end() - 1));
}
{
auto world = range;
world.advance(6);
EXPECT_EQ(5, world.size());
EXPECT_EQ('w', *world.data());
EXPECT_EQ('d', *(world.end() - 1));
}
std::string expected = "hello world";
EXPECT_EQ(expected, std::string(range.begin(), range.end()));
EXPECT_EQ(expected, std::string(range.data(), range.size()));
}

View file

@ -0,0 +1,72 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/ResourcePool.h"
#include <gtest/gtest.h>
#include <atomic>
#include <thread>
using namespace pzstd;
TEST(ResourcePool, FullTest) {
unsigned numCreated = 0;
unsigned numDeleted = 0;
{
ResourcePool<int> pool(
[&numCreated] { ++numCreated; return new int{5}; },
[&numDeleted](int *x) { ++numDeleted; delete x; });
{
auto i = pool.get();
EXPECT_EQ(5, *i);
*i = 6;
}
{
auto i = pool.get();
EXPECT_EQ(6, *i);
auto j = pool.get();
EXPECT_EQ(5, *j);
*j = 7;
}
{
auto i = pool.get();
EXPECT_EQ(6, *i);
auto j = pool.get();
EXPECT_EQ(7, *j);
}
}
EXPECT_EQ(2, numCreated);
EXPECT_EQ(numCreated, numDeleted);
}
TEST(ResourcePool, ThreadSafe) {
std::atomic<unsigned> numCreated{0};
std::atomic<unsigned> numDeleted{0};
{
ResourcePool<int> pool(
[&numCreated] { ++numCreated; return new int{0}; },
[&numDeleted](int *x) { ++numDeleted; delete x; });
auto push = [&pool] {
for (int i = 0; i < 100; ++i) {
auto x = pool.get();
++*x;
}
};
std::thread t1{push};
std::thread t2{push};
t1.join();
t2.join();
auto x = pool.get();
auto y = pool.get();
EXPECT_EQ(200, *x + *y);
}
EXPECT_GE(2, numCreated);
EXPECT_EQ(numCreated, numDeleted);
}

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/ScopeGuard.h"
#include <gtest/gtest.h>
using namespace pzstd;
TEST(ScopeGuard, Dismiss) {
{
auto guard = makeScopeGuard([&] { EXPECT_TRUE(false); });
guard.dismiss();
}
}
TEST(ScopeGuard, Executes) {
bool executed = false;
{
auto guard = makeScopeGuard([&] { executed = true; });
}
EXPECT_TRUE(executed);
}

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/ThreadPool.h"
#include <gtest/gtest.h>
#include <atomic>
#include <iostream>
#include <thread>
#include <vector>
using namespace pzstd;
TEST(ThreadPool, Ordering) {
std::vector<int> results;
{
ThreadPool executor(1);
for (int i = 0; i < 10; ++i) {
executor.add([ &results, i ] { results.push_back(i); });
}
}
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(i, results[i]);
}
}
TEST(ThreadPool, AllJobsFinished) {
std::atomic<unsigned> numFinished{0};
std::atomic<bool> start{false};
{
std::cerr << "Creating executor" << std::endl;
ThreadPool executor(5);
for (int i = 0; i < 10; ++i) {
executor.add([ &numFinished, &start ] {
while (!start.load()) {
std::this_thread::yield();
}
++numFinished;
});
}
std::cerr << "Starting" << std::endl;
start.store(true);
std::cerr << "Finishing" << std::endl;
}
EXPECT_EQ(10, numFinished.load());
}
TEST(ThreadPool, AddJobWhileJoining) {
std::atomic<bool> done{false};
{
ThreadPool executor(1);
executor.add([&executor, &done] {
while (!done.load()) {
std::this_thread::yield();
}
// Sleep for a second to be sure that we are joining
std::this_thread::sleep_for(std::chrono::seconds(1));
executor.add([] {
EXPECT_TRUE(false);
});
});
done.store(true);
}
}

View file

@ -0,0 +1,282 @@
/*
* Copyright (c) 2016-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include "utils/Buffer.h"
#include "utils/WorkQueue.h"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
using namespace pzstd;
namespace {
struct Popper {
WorkQueue<int>* queue;
int* results;
std::mutex* mutex;
void operator()() {
int result;
while (queue->pop(result)) {
std::lock_guard<std::mutex> lock(*mutex);
results[result] = result;
}
}
};
}
TEST(WorkQueue, SingleThreaded) {
WorkQueue<int> queue;
int result;
queue.push(5);
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(5, result);
queue.push(1);
queue.push(2);
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(1, result);
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(2, result);
queue.push(1);
queue.push(2);
queue.finish();
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(1, result);
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(2, result);
EXPECT_FALSE(queue.pop(result));
queue.waitUntilFinished();
}
TEST(WorkQueue, SPSC) {
WorkQueue<int> queue;
const int max = 100;
for (int i = 0; i < 10; ++i) {
queue.push(int{i});
}
std::thread thread([ &queue, max ] {
int result;
for (int i = 0;; ++i) {
if (!queue.pop(result)) {
EXPECT_EQ(i, max);
break;
}
EXPECT_EQ(i, result);
}
});
std::this_thread::yield();
for (int i = 10; i < max; ++i) {
queue.push(int{i});
}
queue.finish();
thread.join();
}
TEST(WorkQueue, SPMC) {
WorkQueue<int> queue;
std::vector<int> results(50, -1);
std::mutex mutex;
std::vector<std::thread> threads;
for (int i = 0; i < 5; ++i) {
threads.emplace_back(Popper{&queue, results.data(), &mutex});
}
for (int i = 0; i < 50; ++i) {
queue.push(int{i});
}
queue.finish();
for (auto& thread : threads) {
thread.join();
}
for (int i = 0; i < 50; ++i) {
EXPECT_EQ(i, results[i]);
}
}
TEST(WorkQueue, MPMC) {
WorkQueue<int> queue;
std::vector<int> results(100, -1);
std::mutex mutex;
std::vector<std::thread> popperThreads;
for (int i = 0; i < 4; ++i) {
popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
}
std::vector<std::thread> pusherThreads;
for (int i = 0; i < 2; ++i) {
auto min = i * 50;
auto max = (i + 1) * 50;
pusherThreads.emplace_back(
[ &queue, min, max ] {
for (int i = min; i < max; ++i) {
queue.push(int{i});
}
});
}
for (auto& thread : pusherThreads) {
thread.join();
}
queue.finish();
for (auto& thread : popperThreads) {
thread.join();
}
for (int i = 0; i < 100; ++i) {
EXPECT_EQ(i, results[i]);
}
}
TEST(WorkQueue, BoundedSizeWorks) {
WorkQueue<int> queue(1);
int result;
queue.push(5);
queue.pop(result);
queue.push(5);
queue.pop(result);
queue.push(5);
queue.finish();
queue.pop(result);
EXPECT_EQ(5, result);
}
TEST(WorkQueue, BoundedSizePushAfterFinish) {
WorkQueue<int> queue(1);
int result;
queue.push(5);
std::thread pusher([&queue] {
queue.push(6);
});
// Dirtily try and make sure that pusher has run.
std::this_thread::sleep_for(std::chrono::seconds(1));
queue.finish();
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(5, result);
EXPECT_FALSE(queue.pop(result));
pusher.join();
}
TEST(WorkQueue, SetMaxSize) {
WorkQueue<int> queue(2);
int result;
queue.push(5);
queue.push(6);
queue.setMaxSize(1);
std::thread pusher([&queue] {
queue.push(7);
});
// Dirtily try and make sure that pusher has run.
std::this_thread::sleep_for(std::chrono::seconds(1));
queue.finish();
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(5, result);
EXPECT_TRUE(queue.pop(result));
EXPECT_EQ(6, result);
EXPECT_FALSE(queue.pop(result));
pusher.join();
}
TEST(WorkQueue, BoundedSizeMPMC) {
WorkQueue<int> queue(10);
std::vector<int> results(200, -1);
std::mutex mutex;
std::cerr << "Creating popperThreads" << std::endl;
std::vector<std::thread> popperThreads;
for (int i = 0; i < 4; ++i) {
popperThreads.emplace_back(Popper{&queue, results.data(), &mutex});
}
std::cerr << "Creating pusherThreads" << std::endl;
std::vector<std::thread> pusherThreads;
for (int i = 0; i < 2; ++i) {
auto min = i * 100;
auto max = (i + 1) * 100;
pusherThreads.emplace_back(
[ &queue, min, max ] {
for (int i = min; i < max; ++i) {
queue.push(int{i});
}
});
}
std::cerr << "Joining pusherThreads" << std::endl;
for (auto& thread : pusherThreads) {
thread.join();
}
std::cerr << "Finishing queue" << std::endl;
queue.finish();
std::cerr << "Joining popperThreads" << std::endl;
for (auto& thread : popperThreads) {
thread.join();
}
std::cerr << "Inspecting results" << std::endl;
for (int i = 0; i < 200; ++i) {
EXPECT_EQ(i, results[i]);
}
}
TEST(WorkQueue, FailedPush) {
WorkQueue<std::unique_ptr<int>> queue;
std::unique_ptr<int> x(new int{5});
EXPECT_TRUE(queue.push(std::move(x)));
EXPECT_EQ(nullptr, x);
queue.finish();
x.reset(new int{6});
EXPECT_FALSE(queue.push(std::move(x)));
EXPECT_NE(nullptr, x);
EXPECT_EQ(6, *x);
}
TEST(BufferWorkQueue, SizeCalculatedCorrectly) {
{
BufferWorkQueue queue;
queue.finish();
EXPECT_EQ(0, queue.size());
}
{
BufferWorkQueue queue;
queue.push(Buffer(10));
queue.finish();
EXPECT_EQ(10, queue.size());
}
{
BufferWorkQueue queue;
queue.push(Buffer(10));
queue.push(Buffer(5));
queue.finish();
EXPECT_EQ(15, queue.size());
}
{
BufferWorkQueue queue;
queue.push(Buffer(10));
queue.push(Buffer(5));
queue.finish();
Buffer buffer;
queue.pop(buffer);
EXPECT_EQ(5, queue.size());
}
}

View file

@ -0,0 +1,35 @@
# ################################################################
# Copyright (c) 2019-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
.PHONY: all
all: recover_directory
ZSTDLIBDIR ?= ../../lib
PROGRAMDIR ?= ../../programs
CFLAGS ?= -O3
CFLAGS += -I$(ZSTDLIBDIR) -I$(PROGRAMDIR)
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
-Wstrict-aliasing=1 -Wswitch-enum \
-Wstrict-prototypes -Wundef \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
-Wredundant-decls -Wmissing-prototypes
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
.PHONY: $(ZSTDLIBDIR)/libzstd.a
$(ZSTDLIBDIR)/libzstd.a:
$(MAKE) -C $(ZSTDLIBDIR) libzstd.a
recover_directory: recover_directory.c $(ZSTDLIBDIR)/libzstd.a $(PROGRAMDIR)/util.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
.PHONY: clean
clean:
rm -f recover_directory

View file

@ -0,0 +1,152 @@
/*
* Copyright (c) 2016-2021, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define ZSTD_STATIC_LINKING_ONLY
#include "util.h"
#include "zstd.h"
#define CHECK(cond, ...) \
do { \
if (!(cond)) { \
fprintf(stderr, "%s:%d CHECK(%s) failed: ", __FILE__, __LINE__, #cond); \
fprintf(stderr, "" __VA_ARGS__); \
fprintf(stderr, "\n"); \
exit(1); \
} \
} while (0)
static void usage(char const *program) {
fprintf(stderr, "USAGE: %s FILE.zst PREFIX\n", program);
fprintf(stderr, "FILE.zst: A zstd compressed file with multiple frames\n");
fprintf(stderr, "PREFIX: The output prefix. Uncompressed files will be "
"created named ${PREFIX}0 ${PREFIX}1...\n\n");
fprintf(stderr, "This program takes concatenated zstd frames and "
"decompresses them into individual files.\n");
fprintf(stderr, "E.g. files created with a command like: zstd -r directory "
"-o file.zst\n");
}
typedef struct {
char *data;
size_t size;
size_t frames;
size_t maxFrameSize;
} ZstdFrames;
static ZstdFrames readFile(char const *fileName) {
U64 const fileSize = UTIL_getFileSize(fileName);
CHECK(fileSize != UTIL_FILESIZE_UNKNOWN, "Unknown file size!");
char *const data = (char *)malloc(fileSize);
CHECK(data != NULL, "Allocation failed");
FILE *file = fopen(fileName, "rb");
CHECK(file != NULL, "fopen failed");
size_t const readSize = fread(data, 1, fileSize, file);
CHECK(readSize == fileSize, "fread failed");
fclose(file);
ZstdFrames frames;
frames.data = (char *)data;
frames.size = fileSize;
frames.frames = 0;
size_t index;
size_t maxFrameSize = 0;
for (index = 0; index < fileSize;) {
size_t const frameSize =
ZSTD_findFrameCompressedSize(data + index, fileSize - index);
CHECK(!ZSTD_isError(frameSize), "Bad zstd frame: %s",
ZSTD_getErrorName(frameSize));
if (frameSize > maxFrameSize)
maxFrameSize = frameSize;
frames.frames += 1;
index += frameSize;
}
CHECK(index == fileSize, "Zstd file corrupt!");
frames.maxFrameSize = maxFrameSize;
return frames;
}
static int computePadding(size_t numFrames) {
return snprintf(NULL, 0, "%u", (unsigned)numFrames);
}
int main(int argc, char **argv) {
if (argc != 3) {
usage(argv[0]);
exit(1);
}
char const *const zstdFile = argv[1];
char const *const prefix = argv[2];
ZstdFrames frames = readFile(zstdFile);
if (frames.frames <= 1) {
fprintf(
stderr,
"%s only has %u zstd frame. Simply use `zstd -d` to decompress it.\n",
zstdFile, (unsigned)frames.frames);
exit(1);
}
int const padding = computePadding(frames.frames - 1);
size_t const outFileNameSize = strlen(prefix) + padding + 1;
char* outFileName = malloc(outFileNameSize);
CHECK(outFileName != NULL, "Allocation failure");
size_t const bufferSize = 128 * 1024;
void *buffer = malloc(bufferSize);
CHECK(buffer != NULL, "Allocation failure");
ZSTD_DCtx* dctx = ZSTD_createDCtx();
CHECK(dctx != NULL, "Allocation failure");
fprintf(stderr, "Recovering %u files...\n", (unsigned)frames.frames);
size_t index;
size_t frame = 0;
for (index = 0; index < frames.size; ++frame) {
size_t const frameSize =
ZSTD_findFrameCompressedSize(frames.data + index, frames.size - index);
int const ret = snprintf(outFileName, outFileNameSize, "%s%0*u", prefix, padding, (unsigned)frame);
CHECK(ret >= 0 && (size_t)ret <= outFileNameSize, "snprintf failed!");
FILE* outFile = fopen(outFileName, "wb");
CHECK(outFile != NULL, "fopen failed");
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only);
ZSTD_inBuffer in = {frames.data + index, frameSize, 0};
while (in.pos < in.size) {
ZSTD_outBuffer out = {buffer, bufferSize, 0};
CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &out, &in)), "decompression failed");
size_t const writeSize = fwrite(out.dst, 1, out.pos, outFile);
CHECK(writeSize == out.pos, "fwrite failed");
}
fclose(outFile);
fprintf(stderr, "Recovered %s\n", outFileName);
index += frameSize;
}
fprintf(stderr, "Complete\n");
free(outFileName);
ZSTD_freeDCtx(dctx);
free(buffer);
free(frames.data);
return 0;
}

View file

@ -0,0 +1,5 @@
seekable_compression
seekable_decompression
seekable_decompression_mem
parallel_processing
parallel_compression

View file

@ -0,0 +1,53 @@
# ################################################################
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
# in the COPYING file in the root directory of this source tree).
# ################################################################
# This Makefile presumes libzstd is built, using `make` in / or /lib/
ZSTDLIB_PATH = ../../../lib
ZSTDLIB_NAME = libzstd.a
ZSTDLIB = $(ZSTDLIB_PATH)/$(ZSTDLIB_NAME)
CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ -I../ -I../../../lib -I../../../lib/common
CFLAGS ?= -O3
CFLAGS += -g
SEEKABLE_OBJS = ../zstdseek_compress.c ../zstdseek_decompress.c $(ZSTDLIB)
.PHONY: default all clean test
default: all
all: seekable_compression seekable_decompression seekable_decompression_mem \
parallel_processing
$(ZSTDLIB):
make -C $(ZSTDLIB_PATH) $(ZSTDLIB_NAME)
seekable_compression : seekable_compression.c $(SEEKABLE_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
seekable_decompression : seekable_decompression.c $(SEEKABLE_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
seekable_decompression_mem : seekable_decompression_mem.c $(SEEKABLE_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
parallel_processing : parallel_processing.c $(SEEKABLE_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
parallel_compression : parallel_compression.c $(SEEKABLE_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ -pthread
clean:
@rm -f core *.o tmp* result* *.zst \
seekable_compression seekable_decompression \
seekable_decompression_mem \
parallel_processing parallel_compression
@echo Cleaning completed

View file

@ -0,0 +1,214 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include <stdlib.h> // malloc, free, exit, atoi
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
#include <string.h> // strlen, memset, strcat
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include <zstd_errors.h>
#if defined(WIN32) || defined(_WIN32)
# include <windows.h>
# define SLEEP(x) Sleep(x)
#else
# include <unistd.h>
# define SLEEP(x) usleep(x * 1000)
#endif
#include "xxhash.h"
#include "pool.h" // use zstd thread pool for demo
#include "zstd_seekable.h"
static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc:");
exit(1);
}
static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}
static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}
static void fseek_orDie(FILE* file, long int offset, int origin)
{
if (!fseek(file, offset, origin)) {
if (!fflush(file)) return;
}
/* error */
perror("fseek");
exit(7);
}
static long int ftell_orDie(FILE* file)
{
long int off = ftell(file);
if (off != -1) return off;
/* error */
perror("ftell");
exit(8);
}
struct job {
const void* src;
size_t srcSize;
void* dst;
size_t dstSize;
unsigned checksum;
int compressionLevel;
int done;
};
static void compressFrame(void* opaque)
{
struct job* job = opaque;
job->checksum = XXH64(job->src, job->srcSize, 0);
size_t ret = ZSTD_compress(job->dst, job->dstSize, job->src, job->srcSize, job->compressionLevel);
if (ZSTD_isError(ret)) {
fprintf(stderr, "ZSTD_compress() error : %s \n", ZSTD_getErrorName(ret));
exit(20);
}
job->dstSize = ret;
job->done = 1;
}
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize, int nbThreads)
{
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = fopen_orDie(outName, "wb");
if (ZSTD_compressBound(frameSize) > 0xFFFFFFFFU) { fprintf(stderr, "Frame size too large \n"); exit(10); }
unsigned dstSize = ZSTD_compressBound(frameSize);
fseek_orDie(fin, 0, SEEK_END);
long int length = ftell_orDie(fin);
fseek_orDie(fin, 0, SEEK_SET);
size_t numFrames = (length + frameSize - 1) / frameSize;
struct job* jobs = malloc_orDie(sizeof(struct job) * numFrames);
size_t i;
for(i = 0; i < numFrames; i++) {
void* in = malloc_orDie(frameSize);
void* out = malloc_orDie(dstSize);
size_t inSize = fread_orDie(in, frameSize, fin);
jobs[i].src = in;
jobs[i].srcSize = inSize;
jobs[i].dst = out;
jobs[i].dstSize = dstSize;
jobs[i].compressionLevel = cLevel;
jobs[i].done = 0;
POOL_add(pool, compressFrame, &jobs[i]);
}
ZSTD_frameLog* fl = ZSTD_seekable_createFrameLog(1);
if (fl == NULL) { fprintf(stderr, "ZSTD_seekable_createFrameLog() failed \n"); exit(11); }
for (i = 0; i < numFrames; i++) {
while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
fwrite_orDie(jobs[i].dst, jobs[i].dstSize, fout);
free((void*)jobs[i].src);
free(jobs[i].dst);
size_t ret = ZSTD_seekable_logFrame(fl, jobs[i].dstSize, jobs[i].srcSize, jobs[i].checksum);
if (ZSTD_isError(ret)) { fprintf(stderr, "ZSTD_seekable_logFrame() error : %s \n", ZSTD_getErrorName(ret)); }
}
{ unsigned char seekTableBuff[1024];
ZSTD_outBuffer out = {seekTableBuff, 1024, 0};
while (ZSTD_seekable_writeSeekTable(fl, &out) != 0) {
fwrite_orDie(seekTableBuff, out.pos, fout);
out.pos = 0;
}
fwrite_orDie(seekTableBuff, out.pos, fout);
}
ZSTD_seekable_freeFrameLog(fl);
free(jobs);
fclose_orDie(fout);
fclose_orDie(fin);
}
static const char* createOutFilename_orDie(const char* filename)
{
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void* outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (const char*)outSpace;
}
int main(int argc, const char** argv) {
const char* const exeName = argv[0];
if (argc!=4) {
printf("wrong arguments\n");
printf("usage:\n");
printf("%s FILE FRAME_SIZE NB_THREADS\n", exeName);
return 1;
}
{ const char* const inFileName = argv[1];
unsigned const frameSize = (unsigned)atoi(argv[2]);
int const nbThreads = atoi(argv[3]);
const char* const outFileName = createOutFilename_orDie(inFileName);
compressFile_orDie(inFileName, outFileName, 5, frameSize, nbThreads);
}
return 0;
}

View file

@ -0,0 +1,194 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
/*
* A simple demo that sums up all the bytes in the file in parallel using
* seekable decompression and the zstd thread pool
*/
#include <stdlib.h> // malloc, exit
#include <stdio.h> // fprintf, perror, feof
#include <string.h> // strerror
#include <errno.h> // errno
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include <zstd_errors.h>
#if defined(WIN32) || defined(_WIN32)
# include <windows.h>
# define SLEEP(x) Sleep(x)
#else
# include <unistd.h>
# define SLEEP(x) usleep(x * 1000)
#endif
#include "pool.h" // use zstd thread pool for demo
#include "zstd_seekable.h"
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc");
exit(1);
}
static void* realloc_orDie(void* ptr, size_t size)
{
ptr = realloc(ptr, size);
if (ptr) return ptr;
/* error */
perror("realloc");
exit(1);
}
static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}
static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}
static void fseek_orDie(FILE* file, long int offset, int origin) {
if (!fseek(file, offset, origin)) {
if (!fflush(file)) return;
}
/* error */
perror("fseek");
exit(7);
}
struct sum_job {
const char* fname;
unsigned long long sum;
unsigned frameNb;
int done;
};
static void sumFrame(void* opaque)
{
struct sum_job* job = (struct sum_job*)opaque;
job->done = 0;
FILE* const fin = fopen_orDie(job->fname, "rb");
ZSTD_seekable* const seekable = ZSTD_seekable_create();
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
size_t const frameSize = ZSTD_seekable_getFrameDecompressedSize(seekable, job->frameNb);
unsigned char* data = malloc_orDie(frameSize);
size_t result = ZSTD_seekable_decompressFrame(seekable, data, frameSize, job->frameNb);
if (ZSTD_isError(result)) { fprintf(stderr, "ZSTD_seekable_decompressFrame() error : %s \n", ZSTD_getErrorName(result)); exit(12); }
unsigned long long sum = 0;
size_t i;
for (i = 0; i < frameSize; i++) {
sum += data[i];
}
job->sum = sum;
job->done = 1;
fclose(fin);
ZSTD_seekable_free(seekable);
free(data);
}
static void sumFile_orDie(const char* fname, int nbThreads)
{
POOL_ctx* pool = POOL_create(nbThreads, nbThreads);
if (pool == NULL) { fprintf(stderr, "POOL_create() error \n"); exit(9); }
FILE* const fin = fopen_orDie(fname, "rb");
ZSTD_seekable* const seekable = ZSTD_seekable_create();
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
unsigned const numFrames = ZSTD_seekable_getNumFrames(seekable);
struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job));
unsigned fnb;
for (fnb = 0; fnb < numFrames; fnb++) {
jobs[fnb] = (struct sum_job){ fname, 0, fnb, 0 };
POOL_add(pool, sumFrame, &jobs[fnb]);
}
unsigned long long total = 0;
for (fnb = 0; fnb < numFrames; fnb++) {
while (!jobs[fnb].done) SLEEP(5); /* wake up every 5 milliseconds to check */
total += jobs[fnb].sum;
}
printf("Sum: %llu\n", total);
POOL_free(pool);
ZSTD_seekable_free(seekable);
fclose(fin);
free(jobs);
}
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
if (argc!=3) {
fprintf(stderr, "wrong arguments\n");
fprintf(stderr, "usage:\n");
fprintf(stderr, "%s FILE NB_THREADS\n", exeName);
return 1;
}
{
const char* const inFilename = argv[1];
int const nbThreads = atoi(argv[2]);
sumFile_orDie(inFilename, nbThreads);
}
return 0;
}

View file

@ -0,0 +1,133 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include <stdlib.h> // malloc, free, exit, atoi
#include <stdio.h> // fprintf, perror, feof, fopen, etc.
#include <string.h> // strlen, memset, strcat
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include "zstd_seekable.h"
static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc:");
exit(1);
}
static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}
static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}
static void compressFile_orDie(const char* fname, const char* outName, int cLevel, unsigned frameSize)
{
FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = fopen_orDie(outName, "wb");
size_t const buffInSize = ZSTD_CStreamInSize(); /* can always read one full block */
void* const buffIn = malloc_orDie(buffInSize);
size_t const buffOutSize = ZSTD_CStreamOutSize(); /* can always flush a full block */
void* const buffOut = malloc_orDie(buffOutSize);
ZSTD_seekable_CStream* const cstream = ZSTD_seekable_createCStream();
if (cstream==NULL) { fprintf(stderr, "ZSTD_seekable_createCStream() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initCStream(cstream, cLevel, 1, frameSize);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initCStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
size_t read, toRead = buffInSize;
while( (read = fread_orDie(buffIn, toRead, fin)) ) {
ZSTD_inBuffer input = { buffIn, read, 0 };
while (input.pos < input.size) {
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
toRead = ZSTD_seekable_compressStream(cstream, &output , &input); /* toRead is guaranteed to be <= ZSTD_CStreamInSize() */
if (ZSTD_isError(toRead)) { fprintf(stderr, "ZSTD_seekable_compressStream() error : %s \n", ZSTD_getErrorName(toRead)); exit(12); }
if (toRead > buffInSize) toRead = buffInSize; /* Safely handle case when `buffInSize` is manually changed to a value < ZSTD_CStreamInSize()*/
fwrite_orDie(buffOut, output.pos, fout);
}
}
while (1) {
ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
size_t const remainingToFlush = ZSTD_seekable_endStream(cstream, &output); /* close stream */
if (ZSTD_isError(remainingToFlush)) { fprintf(stderr, "ZSTD_seekable_endStream() error : %s \n", ZSTD_getErrorName(remainingToFlush)); exit(13); }
fwrite_orDie(buffOut, output.pos, fout);
if (!remainingToFlush) break;
}
ZSTD_seekable_freeCStream(cstream);
fclose_orDie(fout);
fclose_orDie(fin);
free(buffIn);
free(buffOut);
}
static char* createOutFilename_orDie(const char* filename)
{
size_t const inL = strlen(filename);
size_t const outL = inL + 5;
void* outSpace = malloc_orDie(outL);
memset(outSpace, 0, outL);
strcat(outSpace, filename);
strcat(outSpace, ".zst");
return (char*)outSpace;
}
int main(int argc, const char** argv) {
const char* const exeName = argv[0];
if (argc!=3) {
printf("wrong arguments\n");
printf("usage:\n");
printf("%s FILE FRAME_SIZE\n", exeName);
return 1;
}
{ const char* const inFileName = argv[1];
unsigned const frameSize = (unsigned)atoi(argv[2]);
char* const outFileName = createOutFilename_orDie(inFileName);
compressFile_orDie(inFileName, outFileName, 5, frameSize);
free(outFileName);
}
return 0;
}

View file

@ -0,0 +1,141 @@
/*
* Copyright (c) 2017-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*/
#include <stdlib.h> // malloc, exit
#include <stdio.h> // fprintf, perror, feof
#include <string.h> // strerror
#include <errno.h> // errno
#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h> // presumes zstd library is installed
#include <zstd_errors.h>
#include "zstd_seekable.h"
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static void* malloc_orDie(size_t size)
{
void* const buff = malloc(size);
if (buff) return buff;
/* error */
perror("malloc");
exit(1);
}
static void* realloc_orDie(void* ptr, size_t size)
{
ptr = realloc(ptr, size);
if (ptr) return ptr;
/* error */
perror("realloc");
exit(1);
}
static FILE* fopen_orDie(const char *filename, const char *instruction)
{
FILE* const inFile = fopen(filename, instruction);
if (inFile) return inFile;
/* error */
perror(filename);
exit(3);
}
static size_t fread_orDie(void* buffer, size_t sizeToRead, FILE* file)
{
size_t const readSize = fread(buffer, 1, sizeToRead, file);
if (readSize == sizeToRead) return readSize; /* good */
if (feof(file)) return readSize; /* good, reached end of file */
/* error */
perror("fread");
exit(4);
}
static size_t fwrite_orDie(const void* buffer, size_t sizeToWrite, FILE* file)
{
size_t const writtenSize = fwrite(buffer, 1, sizeToWrite, file);
if (writtenSize == sizeToWrite) return sizeToWrite; /* good */
/* error */
perror("fwrite");
exit(5);
}
static size_t fclose_orDie(FILE* file)
{
if (!fclose(file)) return 0;
/* error */
perror("fclose");
exit(6);
}
static void fseek_orDie(FILE* file, long int offset, int origin) {
if (!fseek(file, offset, origin)) {
if (!fflush(file)) return;
}
/* error */
perror("fseek");
exit(7);
}
static void decompressFile_orDie(const char* fname, off_t startOffset, off_t endOffset)
{
FILE* const fin = fopen_orDie(fname, "rb");
FILE* const fout = stdout;
size_t const buffOutSize = ZSTD_DStreamOutSize(); /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
void* const buffOut = malloc_orDie(buffOutSize);
ZSTD_seekable* const seekable = ZSTD_seekable_create();
if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
while (startOffset < endOffset) {
size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
if (!result) {
break;
}
if (ZSTD_isError(result)) {
fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
ZSTD_getErrorName(result));
exit(12);
}
fwrite_orDie(buffOut, result, fout);
startOffset += result;
}
ZSTD_seekable_free(seekable);
fclose_orDie(fin);
fclose_orDie(fout);
free(buffOut);
}
int main(int argc, const char** argv)
{
const char* const exeName = argv[0];
if (argc!=4) {
fprintf(stderr, "wrong arguments\n");
fprintf(stderr, "usage:\n");
fprintf(stderr, "%s FILE START END\n", exeName);
return 1;
}
{
const char* const inFilename = argv[1];
off_t const startOffset = atoll(argv[2]);
off_t const endOffset = atoll(argv[3]);
decompressFile_orDie(inFilename, startOffset, endOffset);
}
return 0;
}

Some files were not shown because too many files have changed in this diff Show more