Skip to content

Commit

Permalink
Force UTF8 for all text reading / writing.
Browse files Browse the repository at this point in the history
Replaced all usages of readFile, writeFile, hGetLine, and hPutStrLn from
Prelude with versions from
[System.IO.UTF8](http://hackage.haskell.org/package/utf8-string-0.3.8/docs/System-IO-UTF8.html)

Fixes idris-lang#94

Tested with `export LANG=C`. Was able to load a unicode-containing .idr
file, compile and run it, use interactive vim stuff like
case-splitting and proof search, and could also :addproof from the repl.

Technically, only the change to readFile in Idris/Chaser.hs is necessary
to fix idris-lang#94.  The rest are mainly for consistency.  I am a little leary
of changing the output encoding to unicode when the system encoding
might not be that on the things that don't deal with idris code, e.g.
the .c, .java, .pom, etc output files, that are then run through gcc,
javac, whatever.

Also, this might address the issue fixed by idris-lang#1334, and make idris-lang#1334
redunant?
  • Loading branch information
LeifW committed Jul 1, 2014
1 parent ed971db commit 6c182c7
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 50 deletions.
5 changes: 3 additions & 2 deletions src/IRTS/CodegenC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import System.Exit
import System.IO
import System.Directory
import System.FilePath ((</>), (<.>))
import qualified System.IO.UTF8 as UTF8
import Control.Monad

codegenC :: CodeGenerator
Expand Down Expand Up @@ -50,12 +51,12 @@ codegenC' defs out exec incs objs libs flags dbg
let h = concatMap toDecl (map fst bc)
let cc = concatMap (uncurry toC) bc
d <- getDataDir
mprog <- readFile (d </> "rts" </> "idris_main" <.> "c")
mprog <- UTF8.readFile (d </> "rts" </> "idris_main" <.> "c")
let cout = headers incs ++ debug dbg ++ h ++ cc ++
(if (exec == Executable) then mprog else "")
case exec of
MavenProject -> putStrLn ("FAILURE: output type not supported")
Raw -> writeFile out cout
Raw -> UTF8.writeFile out cout
_ -> do
(tmpn, tmph) <- tempfile
hPutStr tmph cout
Expand Down
5 changes: 3 additions & 2 deletions src/IRTS/CodegenJava.hs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import System.Directory
import System.Exit
import System.FilePath
import System.IO
import qualified System.IO.UTF8 as UTF8
import System.Process

-----------------------------------------------------------------------
Expand Down Expand Up @@ -97,7 +98,7 @@ generateJavaFile globalInit defs hdrs srcDir out = do
let code = either error
(prettyPrint)-- flatIndent . prettyPrint)
(evalStateT (mkCompilationUnit globalInit defs hdrs out) mkCodeGenEnv)
writeFile (javaFileName srcDir out) code
UTF8.writeFile (javaFileName srcDir out) code

pomFileName :: FilePath -> FilePath
pomFileName tgtDir = tgtDir </> "pom.xml"
Expand All @@ -106,7 +107,7 @@ generatePom :: FilePath -> -- tgt dir
FilePath -> -- output target
[String] -> -- libs
IO ()
generatePom tgtDir out libs = writeFile (pomFileName tgtDir) execPom
generatePom tgtDir out libs = UTF8.writeFile (pomFileName tgtDir) execPom
where
(Ident clsName) = either error id (mkClassName out)
execPom = pomString clsName (takeBaseName out) libs
Expand Down
11 changes: 6 additions & 5 deletions src/IRTS/CodegenJavaScript.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import Data.Word
import Data.Traversable hiding (mapM)
import System.IO
import System.Directory
import qualified System.IO.UTF8 as UTF8

import qualified Data.Map.Strict as M

Expand Down Expand Up @@ -394,10 +395,10 @@ codegenJS_all target definitions includes libs filename outputType = do

included <- concat <$> getIncludes includes
path <- (++) <$> getDataDir <*> (pure "/jsrts/")
idrRuntime <- readFile $ path ++ "Runtime-common.js"
tgtRuntime <- readFile $ concat [path, "Runtime", rt, ".js"]
idrRuntime <- UTF8.readFile $ path ++ "Runtime-common.js"
tgtRuntime <- UTF8.readFile $ concat [path, "Runtime", rt, ".js"]
jsbn <- if compileInfoNeedsBigInt info
then readFile $ path ++ "jsbn/jsbn.js"
then UTF8.readFile $ path ++ "jsbn/jsbn.js"
else return ""
let runtime = ( header
++ includeLibs libs
Expand All @@ -406,7 +407,7 @@ codegenJS_all target definitions includes libs filename outputType = do
++ idrRuntime
++ tgtRuntime
)
writeFile filename ( runtime
UTF8.writeFile filename ( runtime
++ concatMap compileJS opt
++ concatMap compileJS cons
++ main
Expand Down Expand Up @@ -476,7 +477,7 @@ codegenJS_all target definitions includes libs filename outputType = do
concatMap (\lib -> "var " ++ lib ++ " = require(\"" ++ lib ++"\");\n")

getIncludes :: [FilePath] -> IO [String]
getIncludes = mapM readFile
getIncludes = mapM UTF8.readFile

main :: String
main =
Expand Down
5 changes: 3 additions & 2 deletions src/IRTS/Compiler.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import System.IO
import System.Directory
import System.Environment
import System.FilePath ((</>), addTrailingPathSeparator)
import qualified System.IO.UTF8 as UTF8

compile :: Codegen -> FilePath -> Term -> Idris ()
compile codegen f tm
Expand Down Expand Up @@ -78,10 +79,10 @@ compile codegen f tm
dumpDefun <- getDumpDefun
case dumpCases of
Nothing -> return ()
Just f -> runIO $ writeFile f (showCaseTrees defs)
Just f -> runIO $ UTF8.writeFile f (showCaseTrees defs)
case dumpDefun of
Nothing -> return ()
Just f -> runIO $ writeFile f (dumpDefuns defuns)
Just f -> runIO $ UTF8.writeFile f (dumpDefuns defuns)
triple <- Idris.AbsSyntax.targetTriple
cpu <- Idris.AbsSyntax.targetCPU
optimise <- optLevel
Expand Down
3 changes: 2 additions & 1 deletion src/IRTS/DumpBC.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Idris.Core.TT

import IRTS.Bytecode
import Data.List
import qualified System.IO.UTF8 as UTF8

interMap :: [a] -> [b] -> (a -> [b]) -> [b]
interMap xs y f = concat (intersperse y (map f xs))
Expand Down Expand Up @@ -73,4 +74,4 @@ serialize decls =
show name ++ ":\n" ++ interMap bcs "\n" (serializeBC 1)

dumpBC :: [(Name, SDecl)] -> String -> IO ()
dumpBC c output = writeFile output $ serialize $ map toBC c
dumpBC c output = UTF8.writeFile output $ serialize $ map toBC c
3 changes: 2 additions & 1 deletion src/Idris/Chaser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import Data.Time.Clock
import Control.Monad.Trans
import Control.Monad.State
import Data.List
import qualified System.IO.UTF8 as UTF8

import Debug.Trace

Expand Down Expand Up @@ -129,7 +130,7 @@ buildTree built fp = btree [] fp
children lit f done = -- idrisCatch
do exist <- runIO $ doesFileExist f
if exist then do
file_in <- runIO $ readFile f
file_in <- runIO $ UTF8.readFile f
file <- if lit then tclift $ unlit f file_in else return file_in
(_, modules, _) <- parseImports f file
-- The chaser should never report warnings from sub-modules
Expand Down
5 changes: 3 additions & 2 deletions src/Idris/Core/Execute.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import Foreign.Ptr
#endif

import System.IO
import qualified System.IO.UTF8 as UTF8

#ifndef IDRIS_FFI
execute :: Term -> Idris Term
Expand Down Expand Up @@ -244,7 +245,7 @@ execApp env ctxt (EP _ fp _) (_:fn:_:handle:_:rest)
| fp == mkfprim,
Just (FFun "idris_readStr" _ _) <- foreignFromTT fn
= case handle of
EHandle h -> do contents <- execIO $ hGetLine h
EHandle h -> do contents <- execIO $ UTF8.hGetLine h
execApp env ctxt (EConstant (Str (contents ++ "\n"))) rest
_ -> execFail . Msg $
"The argument to idris_readStr should be a handle, but it was " ++
Expand Down Expand Up @@ -395,7 +396,7 @@ getOp fn [EP _ fn' _]
return (EConstant (Str line))
getOp fn [EHandle h]
| fn == prs =
Just $ do contents <- execIO $ hGetLine h
Just $ do contents <- execIO $ UTF8.hGetLine h
return (EConstant (Str (contents ++ "\n")))
getOp n args = getPrim n primitives >>= flip applyPrim args
where getPrim :: Name -> [Prim] -> Maybe ([ExecVal] -> Maybe ExecVal)
Expand Down
3 changes: 2 additions & 1 deletion src/Idris/IdrisDoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import qualified Data.Map as M hiding ((!))
import qualified Data.Ord (compare)
import qualified Data.Set as S
import qualified Data.Text as T
import qualified System.IO.UTF8 as UTF8

import System.IO
import System.IO.Error
Expand Down Expand Up @@ -381,7 +382,7 @@ createDocs ist nsd out =
createIndex nss out
-- Create an empty IdrisDoc file to signal 'out' is used for IdrisDoc
if new -- But only if it not already existed...
then withFile (out </> "IdrisDoc") WriteMode ((flip hPutStr) "")
then withFile (out </> "IdrisDoc") WriteMode ((flip UTF8.hPutStr) "")
else return ()
copyDependencies out
return $ Right ()
Expand Down
37 changes: 19 additions & 18 deletions src/Idris/Interactive.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,38 @@ import Util.System

import System.FilePath
import System.Directory
import System.IO
import System.IO (Handle(..))
import Data.Char
import Data.Maybe (fromMaybe)
import qualified System.IO.UTF8 as UTF8


caseSplitAt :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
caseSplitAt h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
res <- splitOnLine l n fn
iLOG (showSep "\n" (map show res))
let (before, (ap : later)) = splitAt (l-1) (lines src)
res' <- replaceSplits ap res
let new = concat res'
if updatefile
then do let fb = fn ++ "~" -- make a backup!
runIO $ writeFile fb (unlines before ++ new ++ unlines later)
runIO $ UTF8.writeFile fb (unlines before ++ new ++ unlines later)
runIO $ copyFile fb fn
else -- do ihputStrLn h (show res)
ihPrintResult h new

addClauseFrom :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
addClauseFrom h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)
let indent = getIndent 0 (show n) tyline
cl <- getClause l n fn
-- add clause before first blank line in 'later'
let (nonblank, rest) = span (not . all isSpace) (tyline:later)
if updatefile
then do let fb = fn ++ "~"
runIO $ writeFile fb (unlines (before ++ nonblank) ++
runIO $ UTF8.writeFile fb (unlines (before ++ nonblank) ++
replicate indent ' ' ++
cl ++ "\n" ++
unlines rest)
Expand All @@ -66,15 +67,15 @@ addClauseFrom h fn updatefile l n

addProofClauseFrom :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
addProofClauseFrom h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)
let indent = getIndent 0 (show n) tyline
cl <- getProofClause l n fn
-- add clause before first blank line in 'later'
let (nonblank, rest) = span (not . all isSpace) (tyline:later)
if updatefile
then do let fb = fn ++ "~"
runIO $ writeFile fb (unlines (before ++ nonblank) ++
runIO $ UTF8.writeFile fb (unlines (before ++ nonblank) ++
replicate indent ' ' ++
cl ++ "\n" ++
unlines rest)
Expand All @@ -87,7 +88,7 @@ addProofClauseFrom h fn updatefile l n

addMissing :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
addMissing h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)
let indent = getIndent 0 (show n) tyline
i <- getIState
Expand All @@ -101,7 +102,7 @@ addMissing h fn updatefile l n
let (nonblank, rest) = span (not . all isSpace) (tyline:later)
if updatefile
then do let fb = fn ++ "~"
runIO $ writeFile fb (unlines (before ++ nonblank)
runIO $ UTF8.writeFile fb (unlines (before ++ nonblank)
++ extras ++ unlines rest)
runIO $ copyFile fb fn
else ihPrintResult h extras
Expand Down Expand Up @@ -133,7 +134,7 @@ addMissing h fn updatefile l n

makeWith :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
makeWith h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)
let ind = getIndent tyline
let with = mkWith tyline n
Expand All @@ -143,7 +144,7 @@ makeWith h fn updatefile l n
not (ind == getIndent x)) later
if updatefile then
do let fb = fn ++ "~"
runIO $ writeFile fb (unlines (before ++ nonblank)
runIO $ UTF8.writeFile fb (unlines (before ++ nonblank)
++ with ++ "\n" ++
unlines rest)
runIO $ copyFile fb fn
Expand All @@ -156,7 +157,7 @@ doProofSearch :: Handle -> FilePath -> Bool -> Bool ->
doProofSearch h fn updatefile rec l n hints Nothing
= doProofSearch h fn updatefile rec l n hints (Just 10)
doProofSearch h fn updatefile rec l n hints (Just depth)
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)
ctxt <- getContext
mn <- case lookupNames n ctxt of
Expand Down Expand Up @@ -184,7 +185,7 @@ doProofSearch h fn updatefile rec l n hints (Just depth)
(\e -> return ("?" ++ show n))
if updatefile then
do let fb = fn ++ "~"
runIO $ writeFile fb (unlines before ++
runIO $ UTF8.writeFile fb (unlines before ++
updateMeta False tyline (show n) newmv ++ "\n"
++ unlines later)
runIO $ copyFile fb fn
Expand Down Expand Up @@ -233,7 +234,7 @@ addBracket True new | any isSpace new = '(' : new ++ ")"

makeLemma :: Handle -> FilePath -> Bool -> Int -> Name -> Idris ()
makeLemma h fn updatefile l n
= do src <- runIO $ readFile fn
= do src <- runIO $ UTF8.readFile fn
let (before, tyline : later) = splitAt (l-1) (lines src)

-- if the name is in braces, rather than preceded by a ?, treat it
Expand All @@ -256,7 +257,7 @@ makeLemma h fn updatefile l n

if updatefile then
do let fb = fn ++ "~"
runIO $ writeFile fb (addLem before tyline lem lem_app later)
runIO $ UTF8.writeFile fb (addLem before tyline lem lem_app later)
runIO $ copyFile fb fn
else case idris_outputmode i of
RawOutput -> ihPrintResult h $ lem ++ "\n" ++ lem_app
Expand All @@ -267,14 +268,14 @@ makeLemma h fn updatefile l n
StringAtom lem_app],
SexpList [SymbolAtom "definition-type",
StringAtom lem]]]
in runIO . hPutStrLn h $ convSExp "return" good n
in runIO . UTF8.hPutStrLn h $ convSExp "return" good n

else do -- provisional definition
let lem_app = show n ++ appArgs [] mty ++
" = ?" ++ show n ++ "_rhs"
if updatefile then
do let fb = fn ++ "~"
runIO $ writeFile fb (addProv before tyline lem_app later)
runIO $ UTF8.writeFile fb (addProv before tyline lem_app later)
runIO $ copyFile fb fn
else case idris_outputmode i of
RawOutput -> ihPrintResult h $ lem_app
Expand All @@ -283,7 +284,7 @@ makeLemma h fn updatefile l n
SexpList [SymbolAtom "provisional-definition-lemma",
SexpList [SymbolAtom "definition-clause",
StringAtom lem_app]]]
in runIO . hPutStrLn h $ convSExp "return" good n
in runIO . UTF8.hPutStrLn h $ convSExp "return" good n

where getIndent s = length (takeWhile isSpace s)

Expand Down
6 changes: 4 additions & 2 deletions src/Idris/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ import qualified Data.Set as S
import Debug.Trace

import System.FilePath
import System.IO
import System.IO (Handle(..))
import qualified System.IO.UTF8 as UTF8


{-
@
Expand Down Expand Up @@ -1233,7 +1235,7 @@ loadSource h lidr f toline
= do iLOG ("Reading " ++ f)
i <- getIState
let def_total = default_total i
file_in <- runIO $ readFile f
file_in <- runIO $ UTF8.readFile f
file <- if lidr then tclift $ unlit f file_in else return file_in
(mname, imports, pos) <- parseImports f file
ids <- allImportDirs
Expand Down
Loading

0 comments on commit 6c182c7

Please sign in to comment.