{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Copyright   :  (c) 2006-2008 Duncan Coutts
-- License     :  BSD-style
--
-- Maintainer  :  duncan@haskell.org
-- Stability   :  provisional
-- Portability :  portable (H98 + FFI)
--
-- Pure stream based interface to lower level bzlib wrapper
--
-----------------------------------------------------------------------------
module Codec.Compression.BZip.Internal (

  -- * Compression
  compress,
  CompressParams(..),
  defaultCompressParams,

  -- * Decompression
  decompress,
  DecompressParams(..),
  defaultDecompressParams,

  -- * The compression parameter types
  Stream.BlockSize(..),
  Stream.WorkFactor(..),
  Stream.MemoryLevel(..),
  ) where

import Prelude hiding (length)
import Control.Monad (when)
import Control.Exception (assert)
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as L
import qualified Data.ByteString.Internal as S

import qualified Codec.Compression.BZip.Stream as Stream
import Codec.Compression.BZip.Stream (Stream)

-- | The full set of parameters for compression. The defaults are
-- 'defaultCompressParams'.
--
-- The 'compressBufferSize' is the size of the first output buffer containing
-- the compressed data. If you know an approximate upper bound on the size of
-- the compressed data then setting this parameter can save memory. The default
-- compression output buffer size is @16k@. If your extimate is wrong it does
-- not matter too much, the default buffer size will be used for the remaining
-- chunks.
--
data CompressParams = CompressParams {
  CompressParams -> BlockSize
compressBlockSize   :: Stream.BlockSize,
  CompressParams -> WorkFactor
compressWorkFactor  :: Stream.WorkFactor,
  CompressParams -> Int
compressBufferSize  :: Int
}

-- | The full set of parameters for decompression. The defaults are
-- 'defaultDecompressParams'.
--
-- The 'decompressBufferSize' is the size of the first output buffer,
-- containing the uncompressed data. If you know an exact or approximate upper
-- bound on the size of the decompressed data then setting this parameter can
-- save memory. The default decompression output buffer size is @32k@. If your
-- extimate is wrong it does not matter too much, the default buffer size will
-- be used for the remaining chunks.
--
-- One particular use case for setting the 'decompressBufferSize' is if you
-- know the exact size of the decompressed data and want to produce a strict
-- 'Data.ByteString.ByteString'. The compression and deccompression functions
-- use lazy 'Data.ByteString.Lazy.ByteString's but if you set the
-- 'decompressBufferSize' correctly then you can generate a lazy
-- 'Data.ByteString.Lazy.ByteString' with exactly one chunk, which can be
-- converted to a strict 'Data.ByteString.ByteString' in @O(1)@ time using
-- @'Data.ByteString.concat' . 'Data.ByteString.Lazy.toChunks'@.
--
data DecompressParams = DecompressParams {
  DecompressParams -> MemoryLevel
decompressMemoryLevel :: Stream.MemoryLevel,
  DecompressParams -> Int
decompressBufferSize  :: Int
}

-- | The default set of parameters for compression. This is typically used with
-- the @compressWith@ function with specific paramaters overridden.
--
defaultCompressParams :: CompressParams
defaultCompressParams :: CompressParams
defaultCompressParams = CompressParams :: BlockSize -> WorkFactor -> Int -> CompressParams
CompressParams {
  compressBlockSize :: BlockSize
compressBlockSize   = BlockSize
Stream.DefaultBlockSize,
  compressWorkFactor :: WorkFactor
compressWorkFactor  = WorkFactor
Stream.DefaultWorkFactor,
  compressBufferSize :: Int
compressBufferSize  = Int
defaultCompressBufferSize
}

-- | The default set of parameters for decompression. This is typically used with
-- the @compressWith@ function with specific paramaters overridden.
--
defaultDecompressParams :: DecompressParams
defaultDecompressParams :: DecompressParams
defaultDecompressParams = DecompressParams :: MemoryLevel -> Int -> DecompressParams
DecompressParams {
  decompressMemoryLevel :: MemoryLevel
decompressMemoryLevel = MemoryLevel
Stream.DefaultMemoryLevel,
  decompressBufferSize :: Int
decompressBufferSize  = Int
defaultDecompressBufferSize
}

-- | The default chunk sizes for the output of compression and decompression
-- are 16k and 32k respectively (less a small accounting overhead).
--
defaultCompressBufferSize, defaultDecompressBufferSize :: Int
defaultCompressBufferSize :: Int
defaultCompressBufferSize   = Int
16 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
L.chunkOverhead
defaultDecompressBufferSize :: Int
defaultDecompressBufferSize = Int
32 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
1024 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
L.chunkOverhead

{-# NOINLINE compress #-}
compress
  :: CompressParams
  -> L.ByteString
  -> L.ByteString
compress :: CompressParams -> ByteString -> ByteString
compress (CompressParams BlockSize
blockSize WorkFactor
workFactor Int
initChunkSize) ByteString
input =
  [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ Stream [ByteString] -> [ByteString]
forall a. Stream a -> a
Stream.run (Stream [ByteString] -> [ByteString])
-> Stream [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ do
    BlockSize -> Verbosity -> WorkFactor -> Stream ()
Stream.compressInit BlockSize
blockSize Verbosity
Stream.Silent WorkFactor
workFactor
    case ByteString -> [ByteString]
L.toChunks ByteString
input of
      [] -> Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
14 [] --bzip2 header is 14 bytes
      S.PS ForeignPtr Word8
inFPtr Int
offset Int
length : [ByteString]
chunks -> do
        ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushInputBuffer ForeignPtr Word8
inFPtr Int
offset Int
length
        Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
initChunkSize [ByteString]
chunks

  where
    -- we flick between two states:
    --   * where one or other buffer is empty
    --       - in which case we refill one or both
    --   * where both buffers are non-empty
    --       - in which case we compress until a buffer is empty

  fillBuffers :: Int
              -> [S.ByteString]
              -> Stream [S.ByteString]
  fillBuffers :: Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
outChunkSize [ByteString]
inChunks = do
    Stream ()
Stream.consistencyCheck

    -- in this state there are two possabilities:
    --   * no outbut buffer space is available
    --       - in which case we must make more available
    --   * no input buffer is available
    --       - in which case we must supply more
    Bool
inputBufferEmpty <- Stream Bool
Stream.inputBufferEmpty
    Bool
outputBufferFull <- Stream Bool
Stream.outputBufferFull

    Bool -> Stream () -> Stream ()
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Bool
inputBufferEmpty Bool -> Bool -> Bool
|| Bool
outputBufferFull) (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ () -> Stream ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

    Bool -> Stream () -> Stream ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
outputBufferFull (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ do
      ForeignPtr Word8
outFPtr <- IO (ForeignPtr Word8) -> Stream (ForeignPtr Word8)
forall a. IO a -> Stream a
Stream.unsafeLiftIO (Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
S.mallocByteString Int
outChunkSize)
      ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushOutputBuffer ForeignPtr Word8
outFPtr Int
0 Int
outChunkSize

    if Bool
inputBufferEmpty
      then case [ByteString]
inChunks of
             [] -> [ByteString] -> Stream [ByteString]
drainBuffers []
             S.PS ForeignPtr Word8
inFPtr Int
offset Int
length : [ByteString]
inChunks' -> do
                ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushInputBuffer ForeignPtr Word8
inFPtr Int
offset Int
length
                [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks'
      else [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks


  drainBuffers ::
      [S.ByteString]
   -> Stream [S.ByteString]
  drainBuffers :: [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks = do

    Bool
inputBufferEmpty' <- Stream Bool
Stream.inputBufferEmpty
    Bool
outputBufferFull' <- Stream Bool
Stream.outputBufferFull
    Bool -> Stream () -> Stream ()
forall a. (?callStack::CallStack) => Bool -> a -> a
assert(Bool -> Bool
not Bool
outputBufferFull'
       Bool -> Bool -> Bool
&& ([ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
inChunks Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
inputBufferEmpty')) (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ () -> Stream ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    -- this invariant guarantees we can always make forward progress

    let action :: Action
action = if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
inChunks then Action
Stream.Finish else Action
Stream.Run
    Status
status <- Action -> Stream Status
Stream.compress Action
action

    case Status
status of
      Status
Stream.Ok -> do
        Bool
outputBufferFull <- Stream Bool
Stream.outputBufferFull
        if Bool
outputBufferFull
          then do (ForeignPtr Word8
outFPtr, Int
offset, Int
length) <- Stream (ForeignPtr Word8, Int, Int)
Stream.popOutputBuffer
                  [ByteString]
outChunks <- Stream [ByteString] -> Stream [ByteString]
forall a. Stream a -> Stream a
Stream.unsafeInterleave
                    (Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
defaultCompressBufferSize [ByteString]
inChunks)
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> ByteString
S.PS ForeignPtr Word8
outFPtr Int
offset Int
length ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
outChunks)
          else do Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
defaultCompressBufferSize [ByteString]
inChunks

      Status
Stream.StreamEnd -> do
        Bool
inputBufferEmpty <- Stream Bool
Stream.inputBufferEmpty
        Bool -> Stream () -> Stream ()
forall a. (?callStack::CallStack) => Bool -> a -> a
assert Bool
inputBufferEmpty (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ () -> Stream ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        Int
outputBufferBytesAvailable <- Stream Int
Stream.outputBufferBytesAvailable
        if Int
outputBufferBytesAvailable Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
          then do (ForeignPtr Word8
outFPtr, Int
offset, Int
length) <- Stream (ForeignPtr Word8, Int, Int)
Stream.popOutputBuffer
                  Stream ()
Stream.finalise
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return [ForeignPtr Word8 -> Int -> Int -> ByteString
S.PS ForeignPtr Word8
outFPtr Int
offset Int
length]
          else do Stream ()
Stream.finalise
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return []


{-# NOINLINE decompress #-}
decompress
  :: DecompressParams
  -> L.ByteString
  -> L.ByteString
decompress :: DecompressParams -> ByteString -> ByteString
decompress (DecompressParams MemoryLevel
memLevel Int
initChunkSize) ByteString
input =
  [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ Stream [ByteString] -> [ByteString]
forall a. Stream a -> a
Stream.run (Stream [ByteString] -> [ByteString])
-> Stream [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ do
    Verbosity -> MemoryLevel -> Stream ()
Stream.decompressInit Verbosity
Stream.Silent MemoryLevel
memLevel
    case ByteString -> [ByteString]
L.toChunks ByteString
input of
      [] -> Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
4 [] --always an error anyway
      S.PS ForeignPtr Word8
inFPtr Int
offset Int
length : [ByteString]
chunks -> do
        ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushInputBuffer ForeignPtr Word8
inFPtr Int
offset Int
length
        Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
initChunkSize [ByteString]
chunks

  where
    -- we flick between two states:
    --   * where one or other buffer is empty
    --       - in which case we refill one or both
    --   * where both buffers are non-empty
    --       - in which case we compress until a buffer is empty

  fillBuffers :: Int
              -> [S.ByteString]
              -> Stream [S.ByteString]
  fillBuffers :: Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
outChunkSize [ByteString]
inChunks = do

    -- in this state there are two possabilities:
    --   * no outbut buffer space is available
    --       - in which case we must make more available
    --   * no input buffer is available
    --       - in which case we must supply more
    Bool
inputBufferEmpty <- Stream Bool
Stream.inputBufferEmpty
    Bool
outputBufferFull <- Stream Bool
Stream.outputBufferFull

    Bool -> Stream () -> Stream ()
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Bool
inputBufferEmpty Bool -> Bool -> Bool
|| Bool
outputBufferFull) (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ () -> Stream ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

    Bool -> Stream () -> Stream ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
outputBufferFull (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ do
      ForeignPtr Word8
outFPtr <- IO (ForeignPtr Word8) -> Stream (ForeignPtr Word8)
forall a. IO a -> Stream a
Stream.unsafeLiftIO (Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
S.mallocByteString Int
outChunkSize)
      ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushOutputBuffer ForeignPtr Word8
outFPtr Int
0 Int
outChunkSize

    if Bool
inputBufferEmpty
      then case [ByteString]
inChunks of
             [] -> [ByteString] -> Stream [ByteString]
drainBuffers []
             S.PS ForeignPtr Word8
inFPtr Int
offset Int
length : [ByteString]
inChunks' -> do
                ForeignPtr Word8 -> Int -> Int -> Stream ()
Stream.pushInputBuffer ForeignPtr Word8
inFPtr Int
offset Int
length
                [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks'
      else [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks


  drainBuffers ::
      [S.ByteString]
   -> Stream [S.ByteString]
  drainBuffers :: [ByteString] -> Stream [ByteString]
drainBuffers [ByteString]
inChunks = do

    Bool
inputBufferEmpty' <- Stream Bool
Stream.inputBufferEmpty
    Bool
outputBufferFull' <- Stream Bool
Stream.outputBufferFull
    Bool -> Stream () -> Stream ()
forall a. (?callStack::CallStack) => Bool -> a -> a
assert(Bool -> Bool
not Bool
outputBufferFull'
       Bool -> Bool -> Bool
&& ([ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
inChunks Bool -> Bool -> Bool
|| Bool -> Bool
not Bool
inputBufferEmpty')) (Stream () -> Stream ()) -> Stream () -> Stream ()
forall a b. (a -> b) -> a -> b
$ () -> Stream ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    -- this invariant guarantees we can always make forward progress or at
    -- least detect premature EOF

    Status
status <- Stream Status
Stream.decompress

    case Status
status of
      Status
Stream.Ok -> do
        Bool
outputBufferFull <- Stream Bool
Stream.outputBufferFull
        if Bool
outputBufferFull
          then do (ForeignPtr Word8
outFPtr, Int
offset, Int
length) <- Stream (ForeignPtr Word8, Int, Int)
Stream.popOutputBuffer
                  [ByteString]
outChunks <- Stream [ByteString] -> Stream [ByteString]
forall a. Stream a -> Stream a
Stream.unsafeInterleave
                    (Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
defaultDecompressBufferSize [ByteString]
inChunks)
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> ByteString
S.PS ForeignPtr Word8
outFPtr Int
offset Int
length ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
outChunks)
          else do -- We need to detect if we ran out of input:
                  Bool
inputBufferEmpty <- Stream Bool
Stream.inputBufferEmpty
                  if Bool
inputBufferEmpty Bool -> Bool -> Bool
&& [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
inChunks
                    then String -> Stream [ByteString]
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"premature end of compressed stream"
                    else Int -> [ByteString] -> Stream [ByteString]
fillBuffers Int
defaultDecompressBufferSize [ByteString]
inChunks

      Status
Stream.StreamEnd -> do
        -- Note that there may be input bytes still available if the stream
        -- is embeded in some other data stream. Here we just silently discard
        -- any trailing data.
        Int
outputBufferBytesAvailable <- Stream Int
Stream.outputBufferBytesAvailable
        if Int
outputBufferBytesAvailable Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0
          then do (ForeignPtr Word8
outFPtr, Int
offset, Int
length) <- Stream (ForeignPtr Word8, Int, Int)
Stream.popOutputBuffer
                  Stream ()
Stream.finalise
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return [ForeignPtr Word8 -> Int -> Int -> ByteString
S.PS ForeignPtr Word8
outFPtr Int
offset Int
length]
          else do Stream ()
Stream.finalise
                  [ByteString] -> Stream [ByteString]
forall (m :: * -> *) a. Monad m => a -> m a
return []