{-# LANGUAGE ForeignFunctionInterface #-}
-- |
-- Module      : Data.Text.ICU.Text
-- Copyright   : (c) 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Functions for manipulating Unicode text, implemented as bindings to
-- the International Components for Unicode (ICU) libraries.
module Data.Text.ICU.Text
    (
    -- * Case conversion
    -- $case
      toCaseFold
    , toLower
    , toUpper
    ) where

import Data.Int (Int32)
import Data.Text (Text)
import Data.Text.Foreign (fromPtr, useAsPtr)
import Data.Text.ICU.Error.Internal (UErrorCode, handleOverflowError)
import Data.Text.ICU.Internal (LocaleName, UChar, withLocaleName)
import Data.Word (Word32)
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr, castPtr)
import System.IO.Unsafe (unsafePerformIO)

-- $case
--
-- In some languages, case conversion is a locale- and
-- context-dependent operation. The case conversion functions in this
-- module are locale and context sensitive.

-- | Case-fold the characters in a string.
--
-- Case folding is locale independent and not context sensitive, but
-- there is an option for treating the letter I specially for Turkic
-- languages.  The result may be longer or shorter than the original.
toCaseFold :: Bool -- ^ Whether to include or exclude mappings for
                   -- dotted and dotless I and i that are marked with
                   -- 'I' in @CaseFolding.txt@.
           -> Text -> Text
toCaseFold :: Bool -> Text -> Text
toCaseFold Bool
excludeI Text
s = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((Ptr UChar -> I16 -> IO Text) -> IO Text)
-> (Ptr UChar -> I16 -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  Text -> (Ptr UChar -> I16 -> IO Text) -> IO Text
forall a. Text -> (Ptr UChar -> I16 -> IO a) -> IO a
useAsPtr Text
s ((Ptr UChar -> I16 -> IO Text) -> Text)
-> (Ptr UChar -> I16 -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \Ptr UChar
sptr I16
slen -> do
    let opts :: Word32
opts = Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word32) -> (Bool -> Int) -> Bool -> Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Bool -> Int
forall a. Enum a => a -> Int
fromEnum (Bool -> Word32) -> Bool -> Word32
forall a b. (a -> b) -> a -> b
$ Bool
excludeI
    Int
-> (Ptr UChar -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr UChar -> Int -> IO Text)
-> IO Text
forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (I16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
        (\Ptr UChar
dptr Int32
dlen -> Ptr UChar
-> Int32
-> Ptr UChar
-> Int32
-> Word32
-> Ptr UErrorCode
-> IO Int32
u_strFoldCase Ptr UChar
dptr Int32
dlen Ptr UChar
sptr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) Word32
opts)
        (\Ptr UChar
dptr Int
dlen -> Ptr UChar -> I16 -> IO Text
fromPtr (Ptr UChar -> Ptr UChar
forall a b. Ptr a -> Ptr b
castPtr Ptr UChar
dptr) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

type CaseMapper = Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> CString
                -> Ptr UErrorCode -> IO Int32

caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap :: CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
mapFn LocaleName
loc Text
s = IO Text -> Text
forall a. IO a -> a
unsafePerformIO (IO Text -> Text)
-> ((CString -> IO Text) -> IO Text)
-> (CString -> IO Text)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
  LocaleName -> (CString -> IO Text) -> IO Text
forall a. LocaleName -> (CString -> IO a) -> IO a
withLocaleName LocaleName
loc ((CString -> IO Text) -> Text) -> (CString -> IO Text) -> Text
forall a b. (a -> b) -> a -> b
$ \CString
locale ->
    Text -> (Ptr UChar -> I16 -> IO Text) -> IO Text
forall a. Text -> (Ptr UChar -> I16 -> IO a) -> IO a
useAsPtr Text
s ((Ptr UChar -> I16 -> IO Text) -> IO Text)
-> (Ptr UChar -> I16 -> IO Text) -> IO Text
forall a b. (a -> b) -> a -> b
$ \Ptr UChar
sptr I16
slen ->
      Int
-> (Ptr UChar -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr UChar -> Int -> IO Text)
-> IO Text
forall a b.
Storable a =>
Int
-> (Ptr a -> Int32 -> Ptr UErrorCode -> IO Int32)
-> (Ptr a -> Int -> IO b)
-> IO b
handleOverflowError (I16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen)
      (\Ptr UChar
dptr Int32
dlen -> CaseMapper
mapFn Ptr UChar
dptr Int32
dlen Ptr UChar
sptr (I16 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral I16
slen) CString
locale)
      (\Ptr UChar
dptr Int
dlen -> Ptr UChar -> I16 -> IO Text
fromPtr (Ptr UChar -> Ptr UChar
forall a b. Ptr a -> Ptr b
castPtr Ptr UChar
dptr) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dlen))

-- | Lowercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toLower :: LocaleName -> Text -> Text
toLower :: LocaleName -> Text -> Text
toLower = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToLower

-- | Uppercase the characters in a string.
--
-- Casing is locale dependent and context sensitive.  The result may
-- be longer or shorter than the original.
toUpper :: LocaleName -> Text -> Text
toUpper :: LocaleName -> Text -> Text
toUpper = CaseMapper -> LocaleName -> Text -> Text
caseMap CaseMapper
u_strToUpper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strFoldCase" u_strFoldCase
    :: Ptr UChar -> Int32 -> Ptr UChar -> Int32 -> Word32 -> Ptr UErrorCode
    -> IO Int32

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToLower" u_strToLower
    :: CaseMapper

foreign import ccall unsafe "hs_text_icu.h __hs_u_strToUpper" u_strToUpper
    :: CaseMapper