module CWStringBasic( -- latin1 versions peekCAString, -- :: CString -> IO String peekCAStringLen, -- :: CStringLen -> IO String newCAString, -- :: String -> IO CString newCAStringLen, -- :: String -> IO CStringLen withCAString, -- :: String -> (CString -> IO a) -> IO a withCAStringLen, -- :: String -> (CStringLen -> IO a) -> IO a -- utf8 versions withUTF8String, withUTF8StringLen, newUTF8String, newUTF8StringLen, peekUTF8String, peekUTF8StringLen ) where import Char import Bits import Foreign.C.String as S newCAStringLen :: String -> IO CStringLen newCAString :: String -> IO CString peekCAString :: CString -> IO String peekCAStringLen :: CStringLen -> IO String withCAStringLen :: String -> (CStringLen -> IO a) -> IO a withCAString :: String -> (CString -> IO a) -> IO a newCAStringLen = S.newCStringLen newCAString = S.newCString peekCAStringLen = S.peekCStringLen peekCAString = S.peekCString withCAStringLen = S.withCStringLen withCAString = S.withCString ----------------- -- UTF8 versions ----------------- newUTF8StringLen :: String -> IO CStringLen newUTF8String :: String -> IO CString peekUTF8String :: CString -> IO String peekUTF8StringLen :: CStringLen -> IO String withUTF8StringLen :: String -> (CStringLen -> IO a) -> IO a withUTF8String :: String -> (CString -> IO a) -> IO a newUTF8StringLen = newCStringLen . toUTF newUTF8String = newCString . toUTF peekUTF8StringLen strPtr = fmap fromUTF $ peekCStringLen strPtr peekUTF8String strPtr = fmap fromUTF $ peekCString strPtr withUTF8String hsStr = withCString (toUTF hsStr) withUTF8StringLen hsStr = withCStringLen (toUTF hsStr) -- these should read and write directly from/to memory. -- A first pass will be needed to determine the size of the allocated region toUTF :: String -> String toUTF [] = [] toUTF (x:xs) | ord x<=0x007F = x:toUTF xs | ord x<=0x07FF = chr (0xC0 .|. ((ord x `shift` (-6)) .&. 0x1F)): chr (0x80 .|. (ord x .&. 0x3F)): toUTF xs | otherwise = chr (0xE0 .|. ((ord x `shift` (-12)) .&. 0x0F)): chr (0x80 .|. ((ord x `shift` (-6)) .&. 0x3F)): chr (0x80 .|. (ord x .&. 0x3F)): toUTF xs fromUTF :: String -> String fromUTF [] = [] fromUTF (all@(x:xs)) | ord x<=0x7F = x:fromUTF xs | ord x<=0xBF = err | ord x<=0xDF = twoBytes all | ord x<=0xEF = threeBytes all | otherwise = err where twoBytes (x1:x2:xs) = chr (((ord x1 .&. 0x1F) `shift` 6) .|. (ord x2 .&. 0x3F)):fromUTF xs twoBytes _ = error "fromUTF: illegal two byte sequence" threeBytes (x1:x2:x3:xs) = chr (((ord x1 .&. 0x0F) `shift` 12) .|. ((ord x2 .&. 0x3F) `shift` 6) .|. (ord x3 .&. 0x3F)):fromUTF xs threeBytes _ = error "fromUTF: illegal three byte sequence" err = error "fromUTF: illegal UTF-8 character"