{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE CPP #-}
module Xeno.DOM.Robust
( parse
, Node
, Content(..)
, name
, attributes
, contents
, children
) where
import Control.Monad.ST
import Control.Spork
#if MIN_VERSION_bytestring(0,11,0)
import Data.ByteString.Internal as BS (ByteString(..), plusForeignPtr)
#else
import Data.ByteString.Internal(ByteString(..))
#endif
import Data.STRef
import qualified Data.Vector.Unboxed as UV
import qualified Data.Vector.Unboxed.Mutable as UMV
import Data.Mutable(asURef, newRef, readRef, writeRef)
#if MIN_VERSION_bytestring(0,11,0)
import Foreign.Ptr (minusPtr)
import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
import System.IO.Unsafe (unsafeDupablePerformIO)
#endif
import Xeno.SAX
import Xeno.Types
import Xeno.DOM.Internal(Node(..), Content(..), name, attributes, contents, children)
parse :: ByteString -> Either XenoException Node
parse :: ByteString -> Either XenoException Node
parse ByteString
inp =
case Vector Int -> Either XenoException (Vector Int)
forall e a. Exception e => a -> Either e a
spork Vector Int
node of
Left XenoException
e -> XenoException -> Either XenoException Node
forall a b. a -> Either a b
Left XenoException
e
Right Vector Int
r ->
case Vector Int -> Maybe Node
findRootNode Vector Int
r of
Just Node
n -> Node -> Either XenoException Node
forall a b. b -> Either a b
Right Node
n
Maybe Node
Nothing -> XenoException -> Either XenoException Node
forall a b. a -> Either a b
Left XenoException
XenoExpectRootNode
where
findRootNode :: Vector Int -> Maybe Node
findRootNode Vector Int
r = Int -> Maybe Node
go Int
0
where
go :: Int -> Maybe Node
go Int
n = case Vector Int
r Vector Int -> Int -> Maybe Int
forall a. Unbox a => Vector a -> Int -> Maybe a
UV.!? Int
n of
Just Int
0x0 -> Node -> Maybe Node
forall a. a -> Maybe a
Just (ByteString -> Int -> Vector Int -> Node
Node ByteString
str Int
n Vector Int
r)
Just Int
0x1 -> Int -> Maybe Node
go (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
Maybe Int
_ -> Maybe Node
forall a. Maybe a
Nothing
#if MIN_VERSION_bytestring(0,11,0)
BS ForeignPtr Word8
offset0 Int
_ = ByteString
str
#else
PS _ offset0 _ = str
#endif
str :: ByteString
str = ByteString -> ByteString
skipDoctype ByteString
inp
node :: Vector Int
node =
(forall s. ST s (Vector Int)) -> Vector Int
forall a. (forall s. ST s a) -> a
runST
(do nil <- Int -> ST s (MVector (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> m (MVector (PrimState m) a)
UMV.new Int
1000
vecRef <- newSTRef nil
sizeRef <- fmap asURef $ newRef 0
parentRef <- fmap asURef $ newRef 0
process Process {
#if MIN_VERSION_bytestring(0,11,0)
openF = \(BS ForeignPtr Word8
name_start Int
name_len) -> do
#else
openF = \(PS _ name_start name_len) -> do
#endif
let tag :: Int
tag = Int
0x00
tag_end :: Int
tag_end = -Int
1
index <- URef s Int -> ST s (RefElement (URef s Int))
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
forall (m :: * -> *).
(PrimMonad m, PrimState m ~ MCState (URef s Int)) =>
URef s Int -> m (RefElement (URef s Int))
readRef URef s Int
sizeRef
v' <-
do v <- readSTRef vecRef
if index + 5 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
tag_parent <- readRef parentRef
do writeRef parentRef index
writeRef sizeRef (index + 5)
UMV.write v' index tag
UMV.write v' (index + 1) tag_parent
UMV.write v' (index + 2) (distance name_start offset0)
UMV.write v' (index + 3) name_len
UMV.write v' (index + 4) tag_end
#if MIN_VERSION_bytestring(0,11,0)
, attrF = \(BS ForeignPtr Word8
key_start Int
key_len) (BS ForeignPtr Word8
value_start Int
value_len) -> do
#else
, attrF = \(PS _ key_start key_len) (PS _ value_start value_len) -> do
#endif
index <- URef s Int -> ST s (RefElement (URef s Int))
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
forall (m :: * -> *).
(PrimMonad m, PrimState m ~ MCState (URef s Int)) =>
URef s Int -> m (RefElement (URef s Int))
readRef URef s Int
sizeRef
v' <-
do v <- readSTRef vecRef
if index + 5 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
let tag = Int
0x02
do writeRef sizeRef (index + 5)
do UMV.write v' index tag
UMV.write v' (index + 1) (distance key_start offset0)
UMV.write v' (index + 2) key_len
UMV.write v' (index + 3) (distance value_start offset0)
UMV.write v' (index + 4) value_len
, endOpenF = \ByteString
_ -> () -> ST s ()
forall a. a -> ST s a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
#if MIN_VERSION_bytestring(0,11,0)
, textF = \(BS ForeignPtr Word8
text_start Int
text_len) -> do
#else
, textF = \(PS _ text_start text_len) -> do
#endif
let tag :: Int
tag = Int
0x01
index <- URef s Int -> ST s (RefElement (URef s Int))
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
forall (m :: * -> *).
(PrimMonad m, PrimState m ~ MCState (URef s Int)) =>
URef s Int -> m (RefElement (URef s Int))
readRef URef s Int
sizeRef
v' <-
do v <- readSTRef vecRef
if index + 3 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
do writeRef sizeRef (index + 3)
do UMV.write v' index tag
UMV.write v' (index + 1) (distance text_start offset0)
UMV.write v' (index + 2) text_len
#if MIN_VERSION_bytestring(0,11,0)
, closeF = \closeTag :: ByteString
closeTag@(BS ForeignPtr Word8
_ Int
_) -> do
#else
, closeF = \closeTag@(PS s _ _) -> do
#endif
v <- STRef s (MVector s Int) -> ST s (MVector s Int)
forall s a. STRef s a -> ST s a
readSTRef STRef s (MVector s Int)
vecRef
index <- readRef sizeRef
untilM $ do
parent <- readRef parentRef
correctTag <- if parent == 0
then return True
else do
parent_name <- UMV.read v (parent + 2)
parent_len <- UMV.read v (parent + 3)
#if MIN_VERSION_bytestring(0,11,0)
let openTag = ForeignPtr Word8 -> Int -> ByteString
BS (ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
BS.plusForeignPtr ForeignPtr Word8
offset0 Int
parent_name) Int
parent_len
#else
let openTag = PS s (parent_name+offset0) parent_len
#endif
return $ openTag == closeTag
UMV.write v (parent + 4) index
previousParent <- UMV.read v (parent + 1)
writeRef parentRef previousParent
return correctTag
#if MIN_VERSION_bytestring(0,11,0)
, cdataF = \(BS ForeignPtr Word8
cdata_start Int
cdata_len) -> do
#else
, cdataF = \(PS _ cdata_start cdata_len) -> do
#endif
let tag :: Int
tag = Int
0x03
index <- URef s Int -> ST s (RefElement (URef s Int))
forall c (m :: * -> *).
(MutableRef c, PrimMonad m, PrimState m ~ MCState c) =>
c -> m (RefElement c)
forall (m :: * -> *).
(PrimMonad m, PrimState m ~ MCState (URef s Int)) =>
URef s Int -> m (RefElement (URef s Int))
readRef URef s Int
sizeRef
v' <-
do v <- readSTRef vecRef
if index + 3 < UMV.length v
then pure v
else do
v' <- UMV.grow v (UMV.length v)
writeSTRef vecRef v'
return v'
do writeRef sizeRef (index + 3)
do UMV.write v' index tag
UMV.write v' (index + 1) (distance cdata_start offset0)
UMV.write v' (index + 2) cdata_len
} str
wet <- readSTRef vecRef
arr <- UV.unsafeFreeze wet
size <- readRef sizeRef
return (UV.unsafeSlice 0 size arr))
untilM :: Monad m => m Bool -> m ()
untilM :: forall (m :: * -> *). Monad m => m Bool -> m ()
untilM m Bool
loop = do
cond <- m Bool
loop
case cond of
Bool
True -> () -> m ()
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Bool
False -> m Bool -> m ()
forall (m :: * -> *). Monad m => m Bool -> m ()
untilM m Bool
loop
#if MIN_VERSION_bytestring(0,11,0)
minusForeignPtr :: ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr ForeignPtr a
fpA ForeignPtr b
fpB = IO Int -> Int
forall a. IO a -> a
unsafeDupablePerformIO (IO Int -> Int) -> IO Int -> Int
forall a b. (a -> b) -> a -> b
$
ForeignPtr a -> (Ptr a -> IO Int) -> IO Int
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr a
fpA ((Ptr a -> IO Int) -> IO Int) -> (Ptr a -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \Ptr a
ptrA -> ForeignPtr b -> (Ptr b -> IO Int) -> IO Int
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr b
fpB ((Ptr b -> IO Int) -> IO Int) -> (Ptr b -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \Ptr b
ptrB ->
Int -> IO Int
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Ptr a -> Ptr b -> Int
forall a b. Ptr a -> Ptr b -> Int
minusPtr Ptr a
ptrA Ptr b
ptrB)
distance :: ForeignPtr a -> ForeignPtr b -> Int
distance :: forall a b. ForeignPtr a -> ForeignPtr b -> Int
distance = ForeignPtr a -> ForeignPtr b -> Int
forall a b. ForeignPtr a -> ForeignPtr b -> Int
minusForeignPtr
#else
distance :: Int -> Int -> Int
distance a b = a - b
#endif