diff --git a/bin/run-tc-ldc93s1_checkpoint_bytes.sh b/bin/run-tc-ldc93s1_checkpoint_bytes.sh new file mode 100755 index 00000000..d6fe98e9 --- /dev/null +++ b/bin/run-tc-ldc93s1_checkpoint_bytes.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ + --test_files ${ldc93s1_csv} --test_batch_size 1 \ + --n_hidden 100 --epochs 1 \ + --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' --bytes_output_mode \ + --learning_rate 0.001 --dropout_rate 0.05 \ + --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' | tee /tmp/resume.log + +if ! grep "Loading best validating checkpoint from" /tmp/resume.log; then + echo "Did not resume training from checkpoint" + exit 1 +else + exit 0 +fi diff --git a/bin/run-tc-ldc93s1_new_bytes.sh b/bin/run-tc-ldc93s1_new_bytes.sh new file mode 100755 index 00000000..5ce787d3 --- /dev/null +++ b/bin/run-tc-ldc93s1_new_bytes.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +epoch_count=$1 +audio_sample_rate=$2 + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --feature_cache '/tmp/ldc93s1_cache' \ + --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ + --test_files ${ldc93s1_csv} --test_batch_size 1 \ + --n_hidden 100 --epochs $epoch_count \ + --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_bytes' \ + --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_bytes' \ + --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \ + --audio_sample_rate ${audio_sample_rate} \ + --bytes_output_mode diff --git a/bin/run-tc-ldc93s1_new_bytes_tflite.sh b/bin/run-tc-ldc93s1_new_bytes_tflite.sh new file mode 100755 index 00000000..f1a79f12 --- /dev/null +++ b/bin/run-tc-ldc93s1_new_bytes_tflite.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +audio_sample_rate=$1 + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar \ + --n_hidden 100 \ + --checkpoint_dir '/tmp/ckpt_bytes' \ + --export_dir '/tmp/train_bytes_tflite' \ + --scorer_path 'data/smoke_test/pruned_lm.bytes.scorer' \ + --bytes_output_mode \ + --audio_sample_rate ${audio_sample_rate} \ + --export_tflite diff --git a/data/smoke_test/pruned_lm.bytes.scorer b/data/smoke_test/pruned_lm.bytes.scorer new file mode 100644 index 00000000..ff87060e Binary files /dev/null and b/data/smoke_test/pruned_lm.bytes.scorer differ diff --git a/data/smoke_test/vocab.pruned.bytes.txt b/data/smoke_test/vocab.pruned.bytes.txt new file mode 100644 index 00000000..4c4c80cb --- /dev/null +++ b/data/smoke_test/vocab.pruned.bytes.txt @@ -0,0 +1,3540 @@ +s p o t +w o r d +j a u n t y +n e a r e r +h e a v y +b e l l +f l i n t ' s +m o r a l i s t +r e s o l v e d +e i g h t h +e u r o p e a n +m o u t h +m i s s u s +m o s s +p a r t y +p a l e +m i l l +c e l t s +d i s p e n s e d +f r a n k l y +s y m p a t h y +m a d +f l a t t e r e d +d e v i l s +v o m i t +c o n t i n u e d +l e a v e +p h i l o s o p h y +i n d e m n i t y +w a i t e d +n e t +t e s t e d +s a x o n +p r o t e c t i v e +g l i t t e r i n +p r e v i o u s +d e a d +l e a r n +f o r t h +l e t t e r +c a r e s +a b o v e +e x c e l l e n c e s +f l a u b e r t +g r a m m o n t +e m p l o y m e n t s +p r e p a r a t o r y +e x h a u s t e d +g r a v e l y +v o l t a i r e +f i f t e e n +i n t i m a c y +r e a s o n a b l y +m i r e +e g g s +h u m b l e +s o m e t h i n g +d a m a g e +p o e t r y +m i n g l e +l o w +s t i c k +v +c o v e r l e s s +f e l l +m e t +s i l e n t +c a s t s +t r o t h +o n l y +l i v e d +u s +r e a s o n i n g s +g a i t +s e v e n t h +h u m b u g +s t r i v i n g +h a b i t +g e n e r a l +t a k e n +a t t r a c t e d +d r a i n e d +w o r t h y +s e c r e t +a r r i v e +o f f +c l o u d s +h a n d +t h e m +i n g e n u u s +i n e v i t a b l e +e a g e r l y +m e l o d y +c u n n i n g +v o l u n t a r i l y +g o l d +b l o o d +t h a n +c o n s c i e n c e +b r e a k i n g +n a t u r e +c o l o r +a t t i t u d e +w h e r e +d i s p o s e s +s t o r e r o o m +i m p e r f e c t +a n g e r +a +s y s t e m a t i c a l l y +r e l i e v e +p a c k e d +p l e a s u r e +f l a t t e r i e s +s l u r +a c c e p t a n c e s +p e c u l i a r +b e s t o w e d +l a b y r i n t h +a r r i v e d +v e n t u r e d +s o c i e t y +a f f a i r s +a f t e r n o o n +w h e e l s +p r i n c e +c h i m e +e a c h +b e a t s +d i s t e m p e r e d +n a t u r a l l y +p e r s o n a l +r e p u t a t i o n +e v e n i n g +v a s t e +m a d a m e +l +i n s i d i o u s +c e t e r a +s m o t h e r e d +c l o t h e s +n o t i c e d +w o n d e r s +b l u e +s u g g e s t i o n +f o r r e s t +m o r n i n g +m e d i t a t e d +a r t i l l e r y +p a s s e s +i m p o s e +c e r t a i n l y +b u s i n e s s +f a t h e r s +n a y +i n t o x i c a t i o n +u n e x p e c t e d +s t r o k i n g +b u t +r e p e a t +d i s t u r b +p o s s i b l y +o h +a c c e p t +l i p s +p l e a s e +h e a r t i l y +a c q u i s i t i o n +e n j o y +a c c e n t u a t i o n +a c c o u n t e d +s w e e t +f i x e d +d e f i n i t e +v i g o r o u s l y +p r o b l e m s +f o l l o w +m a n i f e s t e d +f a s t i n +d e l a y s +d r a w n +e v e n i n g ' s +i s l a n d s +b e t w e e n +n o t w i t h s t a n d i n g +t e r r i b l y +p a s s i o n +r e a d y +s u p e r i m p o s e d +e x p e c t a t i o n s +r e l i g i o u s +r e s p e c t s +s e l f +e m o t i o n a l +m a d e +i n v a r i a b l e +c o n t e m p l a t e +e f f e c t s +i m m o r a l +r e s t e d +m a i n m a s t +s t r i n g s +d e s i g n +c a p a c i t y +a d d e d +u n l o a d +d o n ' t +g r e a t e r +s p e c i a l +l e f t +l e s s +o u g h t +i n e x p r e s s i v e +d r a m a +c h a r i t y +e x c u s e +f o r e i g n +o t h e r ' s +s t y l e +c o n g r a t u l a t i o n s +e n m i t y +f a i r +t h i r t y +s o +t o p +d i a p h o r e s i s +f o r w a r d +s t a g e +o u t s i d e +g r e w +s c o w l +f r e e +p o r t +g r e n a d i e r +u n d e r s t a n d i n g +s t r a i n e d +s e r v i c e s +d i s a g r e e a b l e +w h e n +a n o m a l y +b l a m e +d a y s +m e r r y +c o m p l a i s a n c e +o b t a i n e d +f l u i d +m e d i u m +i ' d +p r i v i l e g e s +r a g +h a g g a r d +i n a u d i b l e +d e v o t i o n +u n c o n s c i o u s l y +r o c k +h o n e s t y ' +r o u n d +s e v e r e +a m o u n t +t a n k a r d +p a i n f u l l y +e n c l o s u r e s +s e t t l e d +p l e a s e d +y o u r s e l v e s +s h e l v e d +h o u r s +c h a s t e +h e r e d i t a r i l y +c o m e s +m a r v e l o u s +y e a +i n c o n s i s t e n c y +h a r d e s t +s t e p s +r i d d l e +s u i t +a p p l i c a t i o n s +p r o f u n d i s +s a n c t i f i e d +e x +s i n s +n o i s e +u t t e r e d +d i s c o v e r e r +n o b l e +r i d e r +c r o s s +a w o k e +h i m s e l f +m e a n s +f e l t +u n d e r s t a n d +s p a n i s h +d e f e n d i n g +e x p r e s s +s k i e s +t w i l i g h t +c o r n e r e d +p r o s p e c t +g o d s +s o e v e r +a r e +r e d o u b l e d +m a c h i n e +t w e n t y +m i x +j u d g i n g +u t m o s t +e v e n t u a l l y +e x c i t i n g +b e g +l u n c h +s e n s e s +o ' s h a n t e r +c o n s i d e r a b l e +i n f a n t r y +e d g e +c l e r i c a l +i t a l i a n +a n g e l +d r e a m s +t e n s i o n +m i r t h +f i l t e r i n +s t r u g g l e +w i l l +d e f i n e +l a d y s h i p +d ' e p i n a y +o b j e c t i v e +u n c o n s c i o u s n e s s +w e ' l l +w h o l l y +a r t i s t ' s +l o d g i n g +p i l e +n u m b e r +b e d c l o t h e s +m e a t +r o a s t e d +f i r s t l i n g s +c o m f o r t a b l e +s l o w +s u p e r s e d e +s h r e d +c o u r s e +p l a c e s +a g a i n s t +s e c u r i n g +p r o b l e m +s a t +l o s e +c u r e s +i n c o m e +v i s i o n s +p o n d e r o u s l y +d e p e n d s +c o n c u r r e n c e +b e g a n +s a k e +e v e ' s +d r o w n +c o n s c i e n t i o u s +w a v e s +a s s e n t +t h e n c e +f a l s e +f r e e t h i n k e r +a c q u a i n t a n c e +s t a r e +m a r k +p a c k e t +s h a f t e s b u r y +h e a r e r s +n i c e l y +s u b s t a n c e +e x t e n d s +s t a r e d +h a i r e d +w o u n d +c h a r i o t e e r +s e e +t h y +f i n g e r s +s i l l y +m u s i c a l +f i t +g l a z e d +c h a r g e +d i r e c t e d +e n c o u r a g e d +p a r i s h +d e c e i v i n g +s t a r t i n g +g a l l a n t r y +p l a i n l y +p a r d o n +m a n i f e s t +f a u l t +h a s t y +s l u m b e r +p i t e o u s l y +e v o k e +w i t h +n e a r l y +h e a r t +t h a t +m a n k i n d +d i s p o s i n g +b r i t i s h +i n t e r e s t i n g +f l o o r +p a i n +a b s o l u t e l y +h o l y +c l o s e l y +u t i l i t y +b r i s t o l +i m p o r t a n c e +r e c o v e r y +s o f t +f i g +t u p p e n c e +c e n t u r y +e x p e n s i v e +e d i t i o n +a d a m ' s +s u p p l y +s o n s +w i r e +s h i e l d s +g u i d e d +b e a t i n g +h o u r +c h e a p e r +t e n +b e l i e f +p r o v e +t h e o r y +f u l l y +j a n i u s +a m +c u r i o u s +e x h i b i t s +n o w +s a n g +t e l e g r a m +m i s u n d e r s t a n d +t h r o n e +d o c t o r ' s +y o u t h +c o n f e s s i o n s +e x p e r i m e n t s +g o l d e n n e s s +b r e a t h +b o r n e +m o d e +l a s t +a l p s +h a r d +s i c k n e s s +s u g g e s t i o n s +c o r r i d o r +h a l f +f e a t u r e s +c o n c i s e l y +d i s c u s s i n g +t h e r e ' l l +s y n t h e s i s +p r e s u m a b l y +c e r e b r a l +p u r s u i t s +a c c o r d i n g +r e p u g n a n t +w a i t +t h o r n +b r u i s e d +s t a v e +b a b y +c r o a k e d +p o w e r +h o w e v e r +p r a c t i c e +b a y c o n +b e f o r e +r e m e m b e r +s p e c t a t o r +b a b i e s +p r o o f +i r e l a n d +p h e n o m e n o n +d i s t i n g u i s h +l i g h t l y +c h a i n +n e a r e s t +i n f a n t +s t a t i o n +p r o d u c t +s e t +s h e e p i s h +p r a y e r +f e m a l e +c o m p r i s e d +p r o f e s s i o n +b a r b a r i t y +e x p e r i e n c e d +f o o l +u n i n t e r e s t i n g +d ' y e +n i h i l o +e x e r c i s e +o b s t r u c k t +e i g h t e e n +d i f f e r +p r i c e +u t t e r m o s t +d e s p e r a t i o n +s p e c u l a t i o n +l o u d e r +s p e c u l a t i o n s +f a k i r s +r h o n e +a m i s s +s h i p +p i a n o +r e a l m s +s e e m e d +s o l i t a r y +s c r a p i n g +c o n s i d e r e d +d o u b l e +m a y b e +s u p r e m e +w h o l e s o m e +f l o t s a m +c r e a t e d +b e d s i d e +s o w e r b y ' s +b i l l s +s u p p o s e +h o n o u r a b l e +s e n s i t i v e +m o r a l i s t s +e a s i e r +s t o o d +d i s t r i c t +a s k +b i l l y ' s +b i t t e r n e s s +w i d o w +t h o u g h t +m e d i c a l +n e x t +a r o s e +p a n t o m i m e +t r e a s u r e +p e e v i s h l y +w h a t +l i s t e n e r s +s e r v e d +h i g h l y +r e s u m e d +t h o s e +t r a v e l l i n g +r u l e +e n t r e n c h m e n t s +p a r t l y +m e r c y +d o u b t e d +g i v e n +d e e p +t a k e +g e n t l e m e n +r e f o r m a t i o n +a b b e y +c o m m u n i t y +f i n g e r +c h i c k e n +o b e d i e n c e +w i v e s +t h r o w n +c o m p o s e +u n i n t e l l i g i b l e +m a i n +c l o s e d +c l a s s i c a l +m +p u l l i n g +c o n t a g i o n +s i r +e m p t y +m i s t r e s s +s u b m i s s i v e l y +s c r u p l e +c i v i l i z a t i o n +t o b a c c o +a l m o s t +l a b e l l e d +a c c o m p a n y i n g +a b o a r d +u n d o u b t e d l y +l o f t y +s o m e t i m e s +a c q u a i n t a n c e s +l i t t l e +c r e a t o r +c o m m o d i o u s +s o m e w h a t +t w e n t i e t h +n o t i c e s +d e b t +c r e a t u r e +s e y f f e r t +w r i n k l e d +b o u n d l e s s +s p e c t a t o r s +b e c a u s e +w e e k +s p e c i f i e d +g o w n +c a s t l e s +w a i t i n g +t e l l s +p o p u l a r +n e c e s s a r y +c e r e m o n i a l +q u i e t l y +f r i e n d s +p r e c i s e +s u f f i c i e n c y +h o n e s t y +d e c l i n e +s c i e n t i f i c +t e r r o r +e c h o +g r i d d l e +b a r c h e s t e r +f u n d a m e n t a l l y +k e p t +p u s h i n g +d i s t r e s s i n g +u s e +t a y +c o n d u c t +i n t e n s e +p e a c e +s h o u l d +c u l t u r e s +q u i t e +l o w e r e d +p r o d i g y +s o w e r b y +d u t y +c h i l d h o o d +s t a g g e r e d +t r u t h +l a n d s c a p e +o f f e r +a c t i o n +d e a n ' s +l o o k s +w o r s h i p +p e e r a g e +c r a c k +f i r m +o u n c e s +m i s t a k e +n o t i c e a b l e +l o a d i n g +o w n +d i s m i s s e d +w h e r e ' s +w a r e +e l e v a t i o n s +e s p e c i a l l y +w r o t e +p r e s e n c e +j o u r n e y +s h a k s p e a r e +w h a t e v e r +d e n i e d +e a r +m e a d o w +a i s y +s u b j e c t +w a s t e +a p p a r a t u s +a s t u t e n e s s +p o n d e r o u s +s p l a s h +l a u g h e d +g r a n t i n g +a l t o g e t h e r +c o u g h +s e r v a n t s +l e s t +b e g i n n i n g +s m o k e +l e t t i n g +c a u s e +t r y +d i v i n e +o c c u p i e s +h e r o e s +t h e r e ' s +o n e ' s +k e e p +r e a d e r s +p o s t +b o o k s +h a r d l y +r a p i d +s l e e p y +e n d u r i n g +a f f o r d s +s l e e k +v i s c o u n t +r e a d i l y +p e r s i s t e d +r e i n s +u n f o r e s e e n +l a p +i n s t r u m e n t +d e t e r m i n e d +n o r +i n t e n t l y +d i s c o v e r +v e x a t i o n +p u p i l s +a r t i s a n s +s t a r v a t i o n +p u r c h a s e d +m o u n t e d +d a y +s i g m u n d +l a i d +g r e a t +f e w +e n t i r e l y +e x t r e m e +c o n s i d e r a b l y +p o l e m i c a l +g o d +m o r i b u s +l o w e s t +r a t h e r +e x p e c t +p o i n t +b r o t h +m a n ' s +a n x i o u s +i n t e r c o s t i a l s +p o n y +d r a w e r s +p h i l o s o p h e r s +n e e d s +s h o w +m e d i c a t r i x +r e f l e c t s +s n u g +c a m e +e x p e n s e +s p i r i t u a l i t y +s p r i n g +d e v o i d +d e a l i n g +s e c o n d a r y +g e t t i n g +c h e s t +k i n d +d e f e c t i v e +s t r u g g l e s +s p a i n +f a n n y +s t a l k e d +d e x t e r o u s l y +t h i n k s +y e s +b a g s +f a i n t +k n o w n +o l d +w i p e +c a l l i n g +r u n n i n +r e s p o n s i v e +r i g h t l y +f u r i o u s l y +c h a r a c t e r +p e o p l e +y e +m o u n t a i n +i m a g i n a r y +c o n t r a s t +t h i t h e r +w o r k e d +w a l k i n g +y o u n g e r +e x p e c t e d +m e c h a n i c a l +s e l l +r i d i c u l e +p e r f e c t e d +o v e r l o o k +e n t h u s i a s m +h y p n o t i s m +d o o r +f a m i l i a r +d r e a m l a n d +t h r o u g h +c a l l +t r o u s s e a u +d e l i g h t +a l o n e +d i s t r i c t s +f l o w s +b l a n k +g a s p i n g +d i s c o u r a g i n g +m o d e s t +d e s c r i b e s +t o u c h e d +u n e d u c a t e d +s c u l p t o r +e a t e r +c o l o u r e d +d e g r e e +w e r e +a r a b i n +l i s t e n i n g +t o n e d +c h a r l e s +o u t l o o k +v a i n +h e r e ' s +w r e t c h e d +d i s c o v e r i n g +s o n g s +p a g e s +r e c e i v e d +c o n g e s t i o n +b o w e d +u n i t e d +s u p p o s i t i o u s +s e a t +a d v a n t a g e o u s +w h i s p e r +l a w +t r a c t +b o b +f l o w i n g +t e r m s +h u m a n +c r a d l e +r e t u r n e d +r a c k +b a d +t i r e d +s h a p e +a s s i s t a n c e +l o n g i n g +p u n c t u a l l y +h u n d r e d +m i d d l e +d y k e s +e n e r g y +m a d n e s s +u n s e l f i s h +a b u n d a n c e +v i e w +c u r e +t e m p l e +e x a l t e d +d e v e l o p m e n t +o b j e c t +y e a r s +t e a +e n d o w e d +s p i r i t +s t a b l e s +b a g +p e r f e c t i n g +j u s t i c e +r e d r i f f +h o u s e h o l d +l a u g h t e r +t h r o e s +w a k i n g +l o o k +m a n y +p a s s a g e s +c a r e +t r u c k l i n g +s h r i e k e d +d i s t a n c e +h e ' d +d e s i r e +s t r a i t e n e d +s o o n +t o l d +i n t e n s e l y +f a n c i e s +p u r c h a s i n g +m a r r y +h y p n o t i z e r +c a s e +g a r d e n +m a n a g i n g +l u m p s +d e t e r m i n e +d a n g e r o u s +r e s o l v e +p e r c e i v e +l i d s +p a r s o n +l a n d +g o e s +w e n d +l o f t i e r +d o c t o r s +b e s t +w e e k s +p a r o d i s t s +p e r s o n a t e d +p l a y i n g +n o r t h w a r d +h o s t i l e +r e m a i n +c o m m e r c i a l +i ' l l +s a f f r o n +c o n d i t i o n +c a n ' t +c o m p a n i o n ' s +o p e r a s +q u e s t i o n a b l e +n a r r a t i v e +t r u n k s +s t r o k e d +s t a b l e +d e s i r a b l e +p r e y +e f f o r t s +m e a n +a r t e r y +d a u g h t e r s +s u n k e n +a m o u n t s +i m p r o p e r +e n e r g i e s +b o t h +a t t a c h +t e d i o u s n e s s +w i s h +c r e d i t e d +s a t i s +s i t t i n g +b u t t e r +a t h e n i a n +c h o r d +a b s o l u t e +s t a t u e +s m i l i n g +l e a r n e d +i n n o c e n t +h o r s e +g r a s p +q u a r t e r +c o r r e s p o n d e n t +c o u r s e d +s l e e p l e s s +w i f e ' s +s e c o n d +p o i n t e d +t r a d i t i o n a l +v i c t o r i a +b a t h +h i d +e t +f a s h i o n e d +f a t h e r +i n c h +g r a c e +m i n d +t h e n +s c a r c e l y +k i s s e s +e v e r y w h e r e +s t i p e n d +p u l s e +r e s t r a i n +s u m m e r +c l o u d +r a p i d l y +w o o d s +s i s t e r +r e m a r k +d e s p i s e +h o o k +j i m +c o n t i g u o u s +r i c h m o n d +f r a m l e y +p o p e ' s +s e c t i o n +c o u n t i n g +a l l +n o b o d y +p a s s +d o o m +s u m m o n +f e a r +i n c r e d u l o u s +d i n e +d o c u m e n t a r i l y +r e s e r v e d +p e r m i t t e d +h u m b l e s t +c h a r l o t t e +b e +d r e a m i l y +a s s u m e s +m e r e +s p o k e +q u a l i t i e s +c a p t a i n +d e l u s i o n s +a n c i e n t s +n o t i c e +s i g n a l +f a c i l i t a t e +a c c o r d a n c e +w i s h i n g +p r e o c c u p i e d +n o v e l +s n e e r +s u b j e c t s +i n f o r m e d +c h a n g e s +p e r f e c t l y +s p i r i t u a l i s e s +d a n c i n +b a s e d +s m o o t h +w a y s +c a n d y +t r e e s +i n +g e n i a l +e x p e r i m e n t +i n d e e d +p o r t i o n +o p e r a t i o n s +b e l o n g s +p r o s t i t u t e d +o f t e n +g r e e n +a u d a c i t y +m e s m e r ' s +u g l i f y i n g +d i f f i c u l t i e s +o b e y e d +l o v i n g +h u n t i n g +f u r n i s h e d +h y p n o t i s t s +e n t r a n c e +t e n d e r +d i v e r t +c r o u p +t h i n k +c l a s p e d +l i k e +c o n f u s i o n +c o m m i t t e d +f o l k +f o r g i v e +r o c k s +i n s i n u a t e d +g l o w i n g +c o n g e n i a l +s m i l e s +c r a f t +g r o u n d +s a m e +w o r l d +t r a n s c e n d e n t a l +p o w e r s +l o c k +c o m p l e t e l y +a s s u m e d +w h o e v e r +s l e e p +s h a m e f u l +h a r a n g u e d +s o n g +d e c l a r e d +q u e e n +b r o o m s t i c k +u p +d e g r e e s +s u b j e c t i v e +b r i g a d e d +i n s e n s i b l e +a p a r t +r e c o m m e n d +p o u n d s +e d u c a t e +r e c r u i t s +e n d e a v o u r +c o n f i d e n c e +s e e m s +e x p e r t +d o u b t l e s s +m a i l +o v e r +v o l u n t e e r s +f o o d +t h o u s a n d +d e f i n e d +c o m p e l l e d +s i d e s +g l o r y +d e v o t e e s +l e d g e +c a r r i e d +h a d n ' t +l u x u r i e s +d e v i c e +m o r a l l y +p r o f e s s e d +e x p r e s s i o n +a s s i s t +s i n g l e d +g o n e +s t r e t c h e d +l i t a n i e s +b a r b a r i a n s +p o c o c u r a n t e +a l f r e d +t e r r i b l e +b r a s s +w o r l d ' s +b o y +w a t e r ' s +b e l i e v i n g +p l a n e t s +f a s t i d i o u s +n o t +e n j o y m e n t +s p i t e +h o w +w o r k +l o u d l y +c o n c e a l e d +p h e n o m e n a +m i n u t e +t h e m ' s +m e e k +w e l l s +w o r s t +f a c t o r s +s p i r i t u a l +a p p e t i t e +s o l e m n +t h e r e +d o a b l e +s h a r e d +c o s t u m e +e n g r o s s +m y s t i c +w e i g h +l o s s +m o r r o w +s o u n d e d +p o k e r +v e r d u r e +f u l f i l +w i l l i n g +s i g h t +m u s k e t r y +f l e e t +c o m m e n t a r y +h o p e +w o o d +u n e g o i s t i c +i n t e n t i o n +a n c i e n t +r e l i s h e d +o c c u p i e d +t h o u g h t s +c o m m u n i t i e s +c o r r e s p o n d e n c e +p e r s o n +n o w a d a y s +r e m i t +d e s i r e d +d e v e l o p e d +s t a y e d +i n d i f f e r e n t l y +s t e a l t h i l y +e x p e r i m e n t e d +d i r e +a l e +s e a r c h i n g +m i g h t +y e l l o w e r +n a t u r a l +t e a r s +i n c r e a s e d +a g r e e d +h o m e r +r o m a n s +p r e v i o u s l y +s h e e t +a s p e c t s +s l i p p e d +n o n +r e f l e c t +i m p e r s o n a t i n g +p a n t i n g +w e t t e d +c e r e b r u m +g r o w n +a r d e n t +h a p p i e s t +p o s s i b l e +a r m s +c l e r g y m a n +r e t u r n +e n g l i s h +u n h a p p y +s h e d +m e n t a l l y +h e l v e t i u s +g e n u s +o r g a n s +p r o d u c e +w a n t s +p a i d +l e g s +v a r i o u s +g a i n s a i d +t r u s t +e l e m e n t s +p o l e +i n v e n t i n g +i n t e r e s t e d +c h a s e +n a i v e t e s +d o i n g +i t s +c u r a t e +w a g n e r ' s +n e w s b o y +b l u s h e d +l o v e l i e r +r e s p o n s e +w o r t h +f a r e +p r o m i s e +a m u s e +d o m e s t i c +s t e p +o c c a s i o n +r a g g e d +b i r d s +m u r m u r +d e c e p t i o n +m i s f o r t u n e +f a s t +h o l d i n g +p a r a d o x i c a l +o r g a n i c +i n d u c e d +p l a t f o r m +c r a y t u r e +m o r a l i s t i c +s e r m o n s +c o c k e ' s +k n o w +m e a n t i m e +s h o u l d n ' t +c e n t +e s p r i t +t u r n e d +g e n t l e m a n +p e r c e p t i o n s +i m m e d i a t e l y +a d v o c a t e s +a n n e a l e d +o r a n g e s +o n e +s p o k e n +p u e r +f r o m +a d d i n g +n e l l y +c o m p a r a t i v e l y +h a t +l o n e l y +r e f i n e d +a d m i r e +b u n d l e +s t a y +i s l e +e y e s +b r a i n s +t h i r d +t u r n +e n d l e s s +d e m e a n e d +g r a n t l y +f a v o u r i t e +o p i n i o n +d r e s s e d +h e a r t y +b i g +s m i l e d +d e s c r i p t i o n s +m e m o r y +f u l l +r e f u l g e n t +c o n s i d e r a t i o n s +m e t h o d +g a l i a n i +s a c r i f i c e r +s h a k e s p e a r e ' s +o v e r s h a d o w i n g +s a v o u r e d +h o n e s t +s u m n e r +v o y a g e +a h +g i l d +p a p a ' s +s e m i +i d e a +r e a l i t y +s e c r e t l y +r e m a i n e d +k n o w s +h e d o n i s m +o r d i n a r y +r e q u i r e d +b o n d +d e l u s i o n +d i s t u r b a n c e +c o m m o n l y +v i e n n a +a c q u i r i n g +p a s s e d +g r i n +p h y s i c s +c o n t r o l l e d +r e s o u n d e d +m i n d s +w a t e r +o r i e n t a l +s i x t y +i n a s m u c h +g o t +g l a d +s h e e p +e a r n s +t e w k e s b u r y +o b l i g i n g +f i e r c e l y +w r i t e +r a r e +m i g h t y +p r e t t y +r e g a r d e d +w e d d i n g +e a r n e s t +p r a c t i s e +t a s t e d +h a v e n +c a l l s +w h e t h e r +t h i s +c o a x i n g l y +f o r e s t +v i v i d +c o n v i n c e d +s p i n s t e r +m e e t +s t u p i d i t y +l o o k e d +i n d i v i d u a l +a b i l i t y +r h y m e +c h e e r f u l +n u r s e r y +a n g r i l y +s h e +a b s e n t +d a s h e d +f r a n t i c +m e m b r a n e s +m i s m a n a g i n g +c o r r e s p o n d e d +c a p t i o u s +w o t a n +s a r v e +o r +c o n d e m n i n g +m o v i n g +v e r b s +o v e r h e a r +s t a l l +d e e p e s t +s t a i r s +a p p r o p r i a t e +c a l m e r +l a u g h i n g +l y i n g +s e n s a t i o n +f e s t i v a l +c o m p l e t e +s u f f i c e +c o n f i d e +q u i e t +h a p p i l y +f a c u l t y +u n f a v o u r a b l y +w a s n ' t +t i l l +l o o k i n g +e y e l i d s +c o m f o r t +c l a i m +e x c l a i m e d +a p p l a u s e +r i d e s +m u t t e r e d +f o r g e t t i n g +b o b b y ' s +a c c e n t +s u g g e s t i n g +p r e b e n d a r y +s t a n d i n g +p e n c i l +w a r t h i n +r u s s i a +m u s i c +c h o s e +f u r t h e r +l u c y ' s +f r o n t +a p p e a r a n c e s +y o u +a b o u t +b r o a d +m o n t h +f i n i s h e d +a c c u s e s +p r i m e +v i c e +p r o f e s s i o n a l +b o a r d +w o n d e r f u l +i f +d i d +p e r s o n s +r a i s e +e u r o p e +f a l s e h o o d +a n g r y +a c c o m p a n i e d +r u f f l e d +f a l m o u t h +b e g g a r +s i n g l e +a m e r i c a n +u n l u c k y +i n t r o d u c t i o n +p r o p e r +a r t i b u s +c h a p s +s u s p i c i o n +s i d e +h a p p e n e d +d o w n w a r d s +o u t +r i s e n +p o s s e s s e d +t h e y ' d +p a r t i c u l a r +i n d i f f e r e n t +s a n k +d a r e s a y +c o n c e i v e +e s t a t e +h u m a n i t y +o p e n s +b a r l e y +c a t h e d r a l +s t o n e +p r o d u c e d +c h a n n e l +r e m e m b r a n c e +j a m e s +c r o w d +d r i v e +l o n d o n +w o m e n +s a n g u i n e o u s +c a u s e d +p r o l o n g i n g +p e r c e i v e d +c h a n c e +c o n j u n c t i o n +r e g r e t t i n g +p u t +w h i c h +p e r f o r m +c l a i r v o y a n c e +y e a r +s h o r t c o m i n g s +p r e j u d i c e d +e n c h a n t i n g +o b s e r v e +l a r r y +s t o p p e d +p e a s a n t r y +d i s p l a c i n g +c o n v e r s i n g +s c o r e +a l l y +b e h a v e s +a c c o r d +s t u d i e d +e x h o r t e d +b a l t i c +r e f r e s h m e n t +c i r c u m s t a n c e +k i s s +p l u n g e d +m o s t +d e l a y +g u a v a +p i c t u r e d +b i l l y +m o r e o v e r +p r e t e n t i o u s l y +t r i e d +t h e r e o f +s t i f f +b e l i e v e d +c o l d n e s s +r e s t +h e r o i s m +u p r o a r i o u s l y +e x i s t e d +p r i n c e s s e s +i m p r i n t e d +u n l i k e l y +i n t i m a t e +c h i l d r e n +s e n d +m o u n t a i n ' s +h e a p +b r o k e n +a c t i o n s +s y m p t o m s +f o r c e s +d o c t o r +p h r a s e +b a s e +s o r r y +g o i n g +p a s s i n g +r a n k +l a b y r i n t h s +f e e l s +m u t u a l +i n e x p r e s s i v e n e s s +e +r e p l y +e x a m p l e +c o n s e q u e n t l y +a l l o w +w h i f f +h o r r i d l y +s u r e +c h a r l a t a n +p o t +s u c h +s e n s e +n e w +a d v a n t a g e +a w a k e +f r e e d o m +c r u s a d e +t r e m b l i n g +a i n ' t +a t l a n t i c +o ' c l o c k +b o u n d +c a b i n +p r o m p t e d +m o m e n t s +s p e a k e r +s e a s o n +s i n g +f a k i r +e x p e d i e n t +p r o f i t s +p r a i s e +n i n e t e e n t h +s h i n d y +f o r e h e a d +s c o t c h +c o n s t r a i n e t h +s u p p e r +v a l h a l l a +s a c r i f i c e +h o m e r i c +w h i t e +e x p l a n a t i o n +h y p n o t i s t +s u f f i c i e n t +d e a r +s t u f f +e x p a n d +p e s s i m i s m +p a t i e n t s +n o r t h +w i c k e d +s t a r s +c o r r e s p o n d i e n c e +r e c r u i t +f e a r f u l l y +c h a p t e r +e l e g a n t +t r i c k +p r i c e s +s i x +w i n t e r +c h e a p +s t r e a m +r e v e n g e +s t e l l a r +d e a l +m o o n +b e y o n d +b e c o m i n g +r e p u l s i v e +m y s t i c a l +d e a n e r y +s m a l l +a h e a d +y e ' d +b e t t e r +s w o r d s +l i v i n g +j e l l y +b o l d e r +v e r y +p l a y e d +t r e a s u r e s +a c c e p t e d +h e c t i c +r e v e r e n d +d e s t r u c t i o n +o x e n +b r o u g h t +s p r e a d s +m o r e +i n d u l g e +v a u l t +c o m e +h e ' l l +o f f e r e d +r i d e +c o a c h +c o n s e n t e d +n ' t +g e t +s u b m i t t e d +s o u n d s +d e g r a d a t i o n +s t a l k +l a t i n +i m p e r f e c t l y +a l w a y s +b r e a k f a s t +n e i t h e r +c r i e d +s e n s i b l y +v a s t +f i e l d +p l e a s a n t +c a r r i a g e +g a v e +f a l l +i d e a s +t a x e d +l i v e s +b o n b o n s +u t i l i t a r i a n s +s e v e n t y +t i m e +m y s e l f +r a y s o n +m a y +c o u n t y +s u n d a y s +a r s +t h e r e f o r e +p r i e s t s +g r o u p +p r o m i s e d +c o r d i a l i t y +c o n s c i o u s n e s s +s o u l +m y s t e r i o u s +c h a n s o n s +n o n e +c a s t o n +h i g h +b o b b y +h i t h e r t o +f a t h e r ' s +c i r c u s +o u r +t h u s +n i n e t y +l i g h t +s t r o n g +l o n g +o p e r a t e s +r e v i v a l +t o l e r a b l y +r e p e a t i n g +c h a m b e r +u l t i m a t e +s o l i c i t e d +r e s p i r a t i o n +l i q u i d +m e g a n t i c +l i k e s +a n g l o +i n v i s i b l e +o p e n +m u l t i f a r i o u s n e s s +c h a o s +u n d e r +s c a l p +r e a l l y +p i n c h +r a r e r +a g r e e a b l e +o b l i g e d +d i s g u i s i n g +g e n t l y +f a i r y +r u i n +c o n c u r r e d +w r a p p e d +o f +e x e r t +v a l e s +f r e e l y +f i x +r e s p e c t a b l y +m a l i c e +m u c h +p a r t i c u l a r l y +b e t w i x t +d r a w i n g +n i g h +e s c a p e d +p h i l o s o p h i z i n g +h y p n o t i c +h y p n o t i z e d +m e a s u r e +u n d e r s t o o d +m e a n w h i l e +s u g a r +b r a v e r y +g r e a s y +g r a n d +c o o k e +t o r r e n t ' s +s u b l i m e r +p l e b e i a n +c o s t u m e s +h e a d e d +n o n s e n s e +d o e s n ' t +s t a t e +g r o w i n g +o b e i s a n c e +d r o p p e d +b e e n +f i r m l y +r e g u l a r +p e t t i n g +p r o f o u n d l y +m o t i v e s +s t o o p i n g +b e i n g s +p o s s i b i l i t y +h y b r i d +h o u s e +h a r m o n y +l a m p +v i r t u e +d i f f i c u l t +m o d e r n +h i g h w a y +u s u a l +e u d a e m o n i s m +s c h o l a r +s e n t i m e n t +s e e i n g +i n n e r +m i n e +w i n d +r e p o r t s +b a s i s +f a i l +t r a y n o r +b l e s s +l o d g e d +d i n n e r +w a l l s +s e v e r n +s a n d y +a l r e a d y +i l l +s i x t e e n +c l o s e +r e s i d e n c e +s h a r p +l a k e +t i d i n g s +h e s i t a t i n g l y +c l o t h i n g +o b s e r v a t i o n +r e c o g n i z e d +t r a v e r s i n g +d a u g h t e r ' s +w i t h d r e w +g e n e r a l l y +h e a v e n ' s +u n c o n s c i o u s +m u s i n g +c o n f e s s +c h a n g e +d u g +f o r g e d +d o m i n a t e +w h e r e b y +f r a n c e +l e a s t +o v e r s p r e a d +k n o w l e d g e +c o r i n t h i a n +s t r u c k +p i c t u r e +c l o t h e +a y +s t a i r +p r e s e n t +s t a t e s +s o n +d a m a s k +c o m m a n d e d +n a p l e s +i n s t a n c e +b e c a m e +s n a p p i n g +w e n t +r e +g r a d a t i o n s +t a k e s +c h i l d +k i n d n e s s +b o o k s h e l f +s t a r t +a r i s t o p h a n i c +f e e l +q u e s t i o n e d +l e t t e r s +f r e q u e n t l y +a p p e a r +r e m a i n s +s e l l i n g +h y p n o s i s +f l u n g +i r k +b a c k +w o r n +h a n o v e r i a n s +s t i m u l a t i o n +k i t c h e n +y a w l +h a p p i n e s s +h i g h e r +g o +p r o x i m i t y +c o r n i s h +h a n d l e +s p i r i t s +o f f e r s +r e l a t i o n +g a m e +q u i n c e y ' s +s p e a k i n +c o u l d +h a n d s +d e v i l r y +j u d i c i o u s l y +p o t a t o e s +t a l k e d +c l e r g y m e n +b o u g h t +e v i d e n t +b o w +a d v a n c e +c r i m e +t w o +e d u c a t e d +e a t e n +g u i l t y +t o w a r d s +o c c a s i o n s +r i d i c u l o u s +a f f o r d +i n t e l l e c t u a l l y +c o u n t e r +h a v e n ' t +e a r s +c h e e k s +f a t +n i g h t +u t i l i t a r i a n i s m +e a g e r +r i g i d +c o r n e r +a r r a n g e +s t a n d a r d +a r i s t o t l e +p u t t i n g +w a l e s +p i c k e d +w e +f a c e s +u n a b l e +i m p o r t a n t +h e a d +i n s i d e +s o l d +b e d +s h o e +d o m e +e x p e r i e n c e +b e l i e v e +t h e y +m a g n e t i z e r +c o l u m n s +p i c k +s i n +q u a c k +r e c i t e d +o m i s s i o n +f i n d s +o p e r a t o r s +a r t i s a n +w i n d f a l l s +y o r k +v a c a n t +d e r v i s h e s +f a n c i e d +a n i m a l +b o b ' s +i m p a t i e n t +h i s +p r e b e n d +p r e s s e d +c h e e k +h a n d i n g +m o d e s +h o g g l e s t o c k +a m o n g +c a n n o t +l a r g e +p r o f e s s i o n a l s +t h r e w +p r o c e s s +s e n t i m e n t s +d o n e +b e i n g +c o n t e n t +s e r e n e +b e a r i n g +s t a r t e d +p e r i o d +l o r d ' s +m a r t h i e u ' s +w a n d e r i n g s +i n c r e a s i n g +c l u n g +b r e a d +r e f l e c t i o n s +o t h e r w i s e +u n d e f e n d e d +s p l e n d i d +h o p e s +b e l o n g i n g +s o u n d +b r u s h y +i v o r y +e q u a l l y +s o m e o n e +o r d e r e d +s o r r a +t h r i v e s +d i s t r i b u t e +g a u n t +t a l e +u n c e a s i n g +d i s m a y +s a d +g l a s s y +a r t i c l e +l u f t o n ' s +e v e n +o ' e r c a s t s +s h a l l o w +n i n e +d i f f e r e n t +c l o t h s +m i s s +a p p e a l s +a n d r e w s +w h i p +o ' e r +f o r g o t +s o o t h i n g +w e s t +u n j u s t +g r a m m a r +o d e +w r o n g +s t r e n g t h +m a r k e d +d r i n k +g a r d e n e r +i n j u r y +e a s i e s t +p e r f o r m a n c e s +t o u c h +c o r n w a l l +m y s t e r i e s +t e l l i n g +r e t r e a t e d +w a l l +a k i n +d e e d s +i n s t e a d +t a s t e s +w r e t c h +s y s t e m s +m a t c h +l a n c e t +a f o o t +s t o n e h e n g e +s t a t i n g +s n a k e s +e x h i b i t i o n +o a r s +t o +p r e s e n t a t i o n +l u f t o n +s e a t e d +n a t i o n a l +d u e +w o r t h l e s s +s p e c u l a t e +s u p p o s e d +a v e r a g e +g r o o m +d e s i n t e r e s s e +f i r m a m e n t +c h o o s e +c o m m e n c e d +t u r f +g r i e v o u s +e n t r a p p e d +f a r m e r +e q u a l +i n t e r v a l +e v i d e n t l y +p h y s i c +s u p e r f l u i t i e s +h u m o u r +p r o v i d e +u n t i l +p r o p e r l y +d u t i e s +e x c e s s +a r t f u l +d i r e c t i o n +d e s t i n e d +s i g h +s t e a d i l y +g o o d y +i n t r o d u c e +r e t u r n s +p i l l o w +m o r a l s +p r o g r e s s e d +r i p +d i s c o o r s i n +c h e e r f u l l y +o p p o s e d +g r i s e l d a +d e m i +d i s t r e s s +a m b i t i o n +c u l t u r e d +s t o r y +r e s i s t +v i s i t s +s l a v e r y +d e c e i v e +p h i l a n t h r o p y +h o n o u r +d i f f e r i n g +m o u r n +f o r n i n t +d r o w n e d +d r i v e n +h u s b a n d +s a t u r d a y s +c o i n c i d e n c e +s e n a t e u r +h a l t +m a n a g e +c o r p o r a l ' s +f o u n t a i n +u n d e r g o i n g +l i f e +a l o n g +c o m p l i c a t e d +g e s t u r e +l o s t +d a n c e +p u r c h a s e r +m o r a l i z i n g +c u r i o s i t y +g o d ' s +b e a t e n +c h i e f +d e c i d e d +a p o l o g e t i c a l l y +c l a s s e s +g e n u i n e +m o t o r +s h o p +f o r t u n e +s u n s +r o a r +r o c k i n g +s u i t i n g +r e d +r e l a t i o n s h i p s +p u n i s h +t e n d e n c y +h o r a c e +f u l n e s s +a r o u n d +r e p e a t e d +p s y c h o l o g i s t +e x p r e s s l y +s i n c e +o c c a s i o n a l l y +f o u n d +e n d e a v o r s +t w i c e +o b s e r v e d +l i s t e n +e i t h e r +o m i t t e d +f r i g h t e n +c a l m l y +c a r e e r +s t e a d y +s c e n e +t e a r +f o l l o w e d +s a t i s f y +b a s k e t +s p a n g l e s +f r i n c h +t i t l e +b a c k w a r d s +d i e s +h a n d k e r c h i e f +s e d u c t i o n +o w i n g +i r r i t a b i l e +s a f e +d i s c u s s i o n s +z e a l +s c e n e s +c h a f f e d +c o a t +p a t i e n t +v i s i t +b l u n d e r s +f i t t e d +f u m e s +w h a t ' s +p u r p o s e +d o c t r i n e +g i l d e d +s a y +s h a d e d +b o x +a r r o g a n c e +r o m a n i z e d +d a m +d r a g o o n +c o n v e r s e d +a d m i r a t i o n +e x t e r n a l +a n +d r e a d f u l +c o n s o r t +h e i g h t e n i n g +s i t +g a t h e r e d +f u r y +m a s s +c a r e f u l +d i g g i n g +w o m e n ' s +r e m o t e +m i l k +g o r g e d +o l i v i a +w e l l +m a l v o l i o +b o u n d s +p i n +b u l l +c u l t u r e +d i s c u s s i o n +m i s f o r t u n e d +b r i n g +s i l v e r +n e a r +i t c h i n g +f i v e +a g e n t +l i k e d +t y p e +l a w y e r +p a p a +t h o u g h t f u l +p o o r +p e e p e d +w a l k e d +c l e a r +d i s c l o s e s +m o c k i n g +n a m e l y +p r e a c h e r s +o p e n e d +c l a s s +p e r s u a d e +t e m p o r i a l +w a s h i n g t o n +g r a d u a l l y +h y p n o t i z a t i o n +h u n t +o r i e n t +s o m e r s e t +s u b j u g a t i o n +p e r s p i r a t i o n +d i v e r s i o n +f o o t s t e p s +b e l o n g e d +t r e a t s +t h e +e x p r e s s e s +h e r s e l f +t e n t +l e a v i n g +s i g h t e d +h a n d i n +n o t e +t a l l +h e r o +d e a t h +p r i n c i p l e s +r e a s o n +o p e n i n g +h e r s +d a u g h t e r +s e d u c t i v e +o b +v a g u e +h a v e +a d d r e s s e d +e n d s +i n c r e a s e +m o r r i s +f i n d +c o r p o r a l +i n q u i r e +i n m o s t +f a c e +i m p a t i e n c e +f l o g g e d +l o n g e r +c r a w l e d +s u r g e d +o r i g i n a l +e n t h u s i a s t i c +o p p o s i t e +y e ' r e +h y p n o t i z i n g +s u r r o u n d e r s +p e r +w e a t h e r i n +c l u m s y +f r e s h +a c c e s s +s e v e r a l +b u y +c o n c e r n i n g +u n t r u e +a n d +s u c h l i k e +m o m e n t +f e l l o w +t h o r o u g h l y +s e n d i n g +g r o a n i n g +q u a n t i t i e s +d o m a i n +o u r s e l v e s +s a t i s f i e d +l o t +s h o r t +h o t +l o v e ' s +m i s t e r +s m i l e +t +s h a m e +n e w s p a p e r s +g o l d s m i t h ' s +b o d y +a c c o u n t a b l e +m y +d i s l i k e +f l o w e r s +s t r a n g e +a p p e r t a i n +a d v e r t i s i n g +o p p o r t u n i t i e s +m i n u t e s +u t t e r l y +s p e n t +r e s p e c t +r e l i e v e d +m o r a l i t i e s +r o c k y +a n s w e r s +d e s c r i b e +a c t i v i t i e s +f o r w a r d s +d r o p p i n +a n o t h e r +y o u n g +d e n i a l +e n j o y e d +s u d d e n l y +h e a r t i e s t +t o n e +d o z e d +s a n c t i t y +l i t e r a t u r e +n e v e r t h e l e s s +s u b t l e +f o u r +s u b t l e t y +a f f a i r +c o w l d +c l a d +r e l i n q u i s h e d +o p e n l y +p s y c h i c +c e n t s +l l +r e s p i r a t o r y +t a l k +a r i s t o p h a n e s +h y s t e r i c a l +e i g h t +s i x t h +i r r e g u l a r +w i f e +b i t +y e t +m o t h e r +s c a n t y +d i s i n t e r e s t e d l y +b u t c h e r ' s +c r u s a d e s +l o r d +p a t h +s p e a k i n g +g r a n d f a t h e r s +s u p p l i e d +o b e y i n g +c o m p a t i b l e +c h e r i s h +r i s e +a d d i c t e d +e f f e c t +s p r e a d +a d d i t i o n a l +p i n i n g +g o d s e n d s +e x t e n t +h e ' s +a p p r o a c h e d +a t o n i n g +w h i l e +f l o r e n t i n e +a p p e a r a n c e +s p e e d y +a p r i l +e u r o p e a n s +e y e +v a l u a t i o n s +s a t i s f a c t o r y +a r m +s c h o o l +w o r l d l y +m e r e l y +l a d i e s +d i s t u r b e d +m e d i t a t i o n s +e v e r y t h i n g +t r u s t e d +w o n ' t +r a i m e n t +a f t e r w a r d s +c r o w d e d +k n e e s +r i d i n g +p r i n c i p l e +m a t t e r +s c r e w i n g +p r o v e s +f i n a l l y +p a y +p l e a s i n g +b u t l e r +g i v e +e a r l i e r +s h o o k +p r e s e n t l y +a l b a n y +e n o r m o u s l y +s o m e +f o r m s +a f f e c t e d +g l i d i n g +i n t e n s i f i e d +w a n t +c l o s e r +g r e e k +c a l l e d +k i n g +c o n t r a d i c t o r y +a n x i o u s l y +r a i s e d +s n a r e s +r e a c h e d +w i t h o u t +a s k e d +f a s t e r +c h r i s t i a n +m e r i t s +m e l a n c h o l y +s i n c e r e +n o +a u t u m n +p r e s u m p t i o n +l o v e r +a f t e r +c r a g g s +a r t +h y p n o t i z e +i m m e a s u r a b l e +a s s u r i n g +d i l a t e +a p p l i e s +v e n t u r e +w o n d e r +s c h o o l s +u n f a l t e r i n g +s u b s e q u e n t +p u z z l e d +s o m e h o w +h o l l a n d i a +u n n u m b e r e d +o n c e +e v e r y +f o r m +c o o l e r +t o g e t h e r +p l u m s +w a l k s +a p p o r t i o n e d +t h o u g h +c h a r m s +r e t u r n i n g +c e n t e r +t i d e +a f t e r w a r d +b e n t h a m +s a w +d r o w s y +f i x i n g +h e r b e r t +d e s i r e s +l o v e s +s h o w e d +t h i n g s +a c r o s s +f o n d l e d +l a t e r +w i d e +s t o r m s +b l i n d +i n t e l l i g e n c e +b o d k i n s +k i n d s +h e x a m e t e r s +m a i d +e a s y +v e s s e l s +f a i x +g i n g e r b r e a d +r e d u c e +f o l l i e s +p l e n t i f u l +w i l d +a d m i t +r u n +m e e t i n g +d o l l +a g a i n +h i s t o r i c a l +k i n d l y +p o o r l y +a n s w e r e d +c o m m u n i c a t e s +m u f f +f r i e n d +f r o +p r i c k i n g +e n t e r t a i n m e n t +c a u s e s +m u n i f i c e n c e +m e +r e c u r r i n g +u p r o o t e d +g r e a t e s t +c o n c o c t i o n +c o a r s e +a u t h e n t i c +p r i d e +t w e l v e +s h o w m a n +r i n g i n g +d i d n ' t +i l l i g a n t +m i l f o r d +b a l l a d s +s t i l l +d e +c o u r a g e +o b v i o u s +i t ' s +s u c c e s s f u l +b a l a n c e +r a t e +c o m m a n d +u n c o n d i t i o n a l l y +m o r a l i t y +w h i r l e d +f a c t +h e r e a f t e r +u t t e r +c a n +l o v e +h u g +r e q u i r e s +a s i d e +t h u n d e r s t o r m +d o e s +p h r a s e s +c o n s i d e r +p r e c o c i o u s +z o l a ' s +e v e n t s +l o u r d e s +i n w a r d +c o n s t a n t l y +w h y +c a n v a s +d r i n k e r +c l e a r e d +p l a i n w a r d +m a k e +t i m b e r +s c h e m i n g +p o e t s +s l u i c e +o n e s +s i e g e +w a s h +m i l t o n +e a s t +m o r a l +w h i t h e r +e a t i n g +c e r t a i n +f i n e +a t t e n d a n c e +d e s c r i b e d +t e m p e r a m e n t +b e g g e d +r i c h e s +m e n +e l e v e n +s i m p l e +u n c l a s p +d e m o c r a t i c +p a r s o n a g e +t u n e +d r e a m +m e c h a n i s m +o t h e r +m a n i f e s t a t i o n s +n a r r o w +a t +r o b a r t s +t u r n i n g +c o n v u l s i v e +s a l t p e t r e +d e e p l y +c l e a n +d w a r f s +t e a c h i n g +r e l i e f +a b l e +p a p e r s +c u r a c y +h a m m e r +p r o u d l y +m e d i o c r i t y +v i r t u e s +a n y h o w +h a r d n e s s +h a v i n g +d i s t u r b s +t e n d +r e a d +s a c r e t +d i s c i p l i n e +o f f i c e +l i b e r a l +s o f t l y +p r o t e c t +m o t l e y +c o m f o r t s +h a i r +c o n t r a r y +s e n s a t i o n s +c r e d i t +u p o n +f i n a l +t i n g l i n g +m a s k +s p i r i t u a l i s i n g +a i r +r e s p e c t a b l e +g r e y +l e d +b a r e +c i r c u m s t a n c e s +g a l l o n +e n g a g e d +h e a r t h s t o n e +g o o d n e s s +c o n t r o l +f o o l s +y e l l o w +i s l a n d +h e a r +b r o t h e r ' s +p l a c e d +a m u s e d +j u s t +p r o f o u n d +n u m b +a c c i d e n t a l +b l e s s e d +w i g s +s e a r c h +c h a r l a t a n i s m +c r a c k l e d +h e +a s k i n g +p r o t e s t +a b s e n c e +g a z e +t e n d e r e d +b u r s t s +p a r t +d i s c o u r a g e m e n t +i n c i d e n t a l l y +q u o t e +f i n e r y +t a k i n g +h o l d +a b o u n d i n g +s c i s s o r s +w e a k e r +e x p l a i n +t o o +m i s f o r t u n e s +s h i p s +d e m o n i a c a l +b e n t +l e t +r i g h t +s o r t +w i s h e d +t h i n g +p r e s e n t s +g l i b l y +t i s +s h i n e +o r d e r +f a i l e d +l a y +c h i e f l y +e x p r e s s e d +s e e n +s c a r r e d +i m p o s s i b l e +r e g a r d s +d e a f e n i n g +l a u g h +r o o m +d a n d y +a c c o u n t s +f o r m e r l y +i m p e r s o n a t i o n s +m o o d +p o p e +h e i r e s s +a c r e s +n e w s +w o m a n +c o u r t i n g +j o v e +a c c o u n t +w a n d e r e d +l a t t e r +a d o p t e d +w o u l d n ' t +s t r i v e +a f e a r ' d +b a d l y +c o u n t e d +i n v o l u n t a r i l y +t h e i r +g i r l s +j u m p e d +s t r i c t e s t +h e a l e r s +h a s +v i r g i n i a +a p p e a r s +c u t +r e d u c e d +b l i s t e r +r e m e m b e r e d +p r o m o t i o n +i n j u r e d +c l a i m s +f r o n t a l +n o r m a l +h e a l t h +p r o t e s t e d +b o l d l y +s p e n d i n g +m o o r i s h +r e l i g i o n +b e c o m e s +f u t u r e +s e x +h u r r y +s e m i c i r c l e +f o o t +e x p e r i e n c e s +f e e d +n u r s e s +s i t u a t i o n +c l i n g +c o u r t s h i p +g e r m a n s +p r o p e r t y +d i s a z e +a w a y +p r e v a i l e d +v o i c e +s o n ' s +u s e f u l n e s s +m o v e d +p u c k ' s +s p r u n g +r e a l i s t i c +n i n e t e e n +s u n k +e a s i l y +p e r q u i s i t e s +g e r m a n y +f e e l i n g +p r e c e d e n t +i n f i n i t e l y +b r o t h e r +r e a c h i n g +w i n e +i n f l u e n c e +i s n ' t +e n e m i e s +p e r f o r m e r s +p e r f o r m a n c e +e f f i c i e n t +o u t w a r d +e x c e e d i n g l y +c h u r c h +c o n c e a l m e n t +a n y t h i n g +c o m m e n t +c a n e +u n c o m m o n +g r a n d e u r +t r u l y +b o o k +d r i n k i n g +t i l l a g e +s a m p l e +l o v e l y +r u n n i n g +s a x o n s +w a s +e n t e r e d +a s t o n i s h m e n t +b r e a k +i r i s h +p o e t ' s +w i t n e s s +m e n t i o n e d +g r u m b l i n g +m a n n e r +r e t i r e m e n t +c a s u a l +s u p e r h u m a n +s a y s +s i c k +n a t u r e s +m a s t e r ' s +h u n t e r +i n s t r u c t i o n s +i m m e d i a t e +r e s u l t s +h e r +c h a i r +a c t +i n v e n t i v e n e s s +s w e d e n b o r g +m a k i n g +b o t t l e s +h o u s e s +p o e t +d a r k +l i e +i l l n e s s +a g e d +l e g i t i m a t e +p u l l +s t a t e m e n t s +c o m p a n i o n +i n t e r e s t s +t h e r e b y +m i n g l i n g +s e i z i n g +r e f l e c t i o n +f r e n c h +w i s h e s +f a c i n g +t e n t s +b r i t a i n +a l s o +t e m p t e d +s u p e r +i r o n +p e a s a n t +t h i n k i n g +s w e e t h e a r t s +f o l l o w i n g +r e q u i r e +p l e a s a n t l y +m e t a p h o r +v o l t a i r e a n +s p a r e +f l o o d +d e l i c a t e +c o n f i d e n t +t h i n k e r +c o o p e r ' s +t o d a y +p r e d e c e s s o r +v i e w s +o p e r a t i o n +d i n +o c c u r r e d +c o n t r a c t e d +s u f f e r i n g +h e r e +i n c e n t i v e +f o r c e +b u l l s +s o l a c e +i n c o m p r e h e n s i b l e +t r a i n +n u t s +i n q u i r e d +p u c k +s p e e c h +e s t e e m +d i m i n i s h i n g +i n o r g a n i c +i n t e l l i g e n t +q u i c k l y +m u s c l e +n a m e +e n d u r e +s t r i p e d +p a t t e r n +r e q u i r e m e n t s +t a l k s +a t t e n d e d +t e m p o r a l +i g n o b l e +v o i c e s +l e a v e s +n i h i l +m a s t e r +d a n g e r +f l a m e s +b o a t +g i v e s +l i b r a r y +p e n n i l e s s +a m e r i c a +h u s h +i ' m +w o r s e +s h a m e f a c e d +g l a n c e +m a n +t a c t +d e l i b e r a t e l y +t r u e +c l o t h +m a s q u e r a d e s +d i f f i d e n t +t i r e +b r e a t h i n g +w e a t h e r +r e j e c t +p a s t +t o s s e d +l i n e +p r e c i s e l y +s t r o k e +w o r k s +u s e d +a p p e a r e d +c o n t i g i t +g i r l +r e a l +w o o i n g +d o u b t +s i m i l a r i t y +d u r i n g +t e m p e r +c +l o c k e d +p e n k n i v e s +o m n i u m +e x c i t e m e n t +i n t o +v i c t i m s +r a n d o m +s t r i p e s +d i v i n i n g +l i k e n e s s +c a p a b l e +i m p o s t o r +h e l p +w a y +s p o i l e d +p r o p h e t s +s a c r e d +e n g a g e m e n t s +s a y i n g +e x t r a o r d i n a r y +i s +p r o v i d e n c e +c r a w l e y +d r a w +o p p o r t u n i t y +i m p r o v e d +g l a s t o n b u r y +s e a +c i g a r e t t e +e x h i b i t +p r o b a b l y +b e h i n d +h o m e +s w a i n s +j u l y +c h a i n s +d i f f i c u l t y +c l a i r v o y a n t +i +f a c u l t i e s +c o m i n g +r e t a l i a t o r y +r e s p o n d e d +t i m e s +e m p l o y e d +b r o w n +g l o u c e s t e r s +l u c y +s c o r n +a u g u s t +v i c i o u s +m u s h a +c r y i n g +c a r b i n e e r +d w a r f +g u a r d +t o m o r r o w +g o o d s +r e p u l s e d +r u b b e d +o p e r a t i n g +c a s e s +g a y +s a i d +u n k n o w n +s u b j e c t ' s +i n f i n i t e +p r o d u c i n g +h a s t e n e d +h i m +g i v i n g +e l s e +l a d y +n e v e r +c o n c e r n +s i l e n c e +m e a n l y +a s +c h a n c e d +f i n e r +r o m a n t i c +s h o o t +t h e s e +i t s e l f +d i s p o s e +b e t r a y +s h u d d e r i n g +a r e n t s c h i l d ' s +n e e d +a r t i s a n ' s +e s s e n t i a l l y +s t o p +t o o k +c o n v i n c e +i g n o r a n c e +p a t i e n c e +p r i v i l e g e d +s h a l l +r e f i n e m e n t +d e t e r m i n e s +m o v e m e n t s +m o n e y +a c c u s t o m e d +f o o t i n g +d r e a m t +c o l o r s +r e l a t i o n s +a v o i d +d i s c e r n m e n t +s t o c k +i n t e n d e d +r e c o r d s +l i v e +r i v e r +d e v i l +a m u s i n g +f i d d l e +i m p l i e s +a n s w e r +i n t e r e s t +e m p l o y m e n t +e n g a g e m e n t +m a s s i v e +h o r s e s +s i n t r y +a w a r e +p r e f e r m e n t +u g l y +f e v e r +t h e m s e l v e s +w r i t t e n +m a k e s +c r a w l e y ' s +b e l i e v e r +n a t i v e +m o n t h ' s +l a d e n +e x t i n g u i s h +l o u d +h u s b a n d ' s +t w i r l e d +h o n o r +f u n n y +l e a n e d +m a m m a +p l a n e t +s u f f u s e d +c o m p a n i o n s +f a m i l y +i t +i n c l i n e s +a b u n d a n t +g a l l e d +h e a r d +f a r e w e l l +a u t h o r i t a t i v e +a z u r e +o p e r a t e d +s w a m p +l a b o u r +u s e f u l +c u s h m a n +r e j o i c i n g +b a n k +r e a s o n s +h a s t e +g a r m e n t +a u d i e n c e +w e a r i n e s s +t h r e e +w o v e n +h o a r s e +w h o +i n c o m e s +p a r i s +s t u p i d +s w i r l i n g +c a r g o +d e c e i t +e m p r e s s e s +a c t u a t e d +w a t e r s +m a r v e l l o u s l y +l a n g u a g e +d a s h +o n +i n v e n t i o n +g r e a t l y +s +g i r l ' s +m a n a g e d +i n t e r r o g a t i o n +n e g l e c t e d +a s s ' s +d a n e s +o p i u m +t y p e s +d o m i n a t i n g +i m p r e s s i o n +p r o v i n c i a l i s m +v a n i t y +r e s u l t +h e a v i l y +a r i s e s +e n d +s c h o l a r s h i p +r i c h +r e s p o n d +e n t e r t a i n i n g +t a s t e +r e p r e s e n t +c h i a j a +s u f f e r e d +s u p p o r t +c i v i l i z a t i o n s +m a t t e r s +o b j e c t e d +m a l e +y o u r +o w n e d +e x i s t s +r e s i s t e d +b r i t o n s +l i n e r +f a r +g u i d e +d i s c o v e r e d +u n w i l l i n g +c u r l y +c o t t a g e +s h a r e +o b l i g e +i n v a r i a b l y +i n s t i n c t +d i s g u s t +a p p r o v a l +o p e r a t o r +c a r e f u l l y +n o r t h m e n +w i t h i n +m o t i o n +f o r t i f i e d +s h e f f i e l d +f o l l y +m a t e r i a l +o f f e r i n g +l a n d l a d y +b a r o c c o +w e a k +t i p p e d +p u r i t a n +m e l l o w +t h r a v e l s +i m p a r t +c a r r y +c o u n t r y +p o r k +p l a c e +r +s t r a i g h t +r o s e +w h o l e +i m p a d e +p r e f e r e n c e s +m a t u r i t y +s p e a k +c o u l d n ' t +d u k e +w a n t e d +s e e m i n g l y +e n o u g h +w a r m +l a k e ' s +b r a i n +g a t h e r +m o i s t e n +f o r +p h e n o m e n o n s +a r m e d +s n o r e d +p r a c t i c a l l y +a g e +e x p l a i n e d +f a r m y a r d +h y d r a u l i c +d r e w +w o o d e d +d i s e n g a g e +c o n v e r s a t i o n +n a r r o w n e s s +h u r r i e d l y +b r o o m +p o p u l a c e +d e a f +p u r e l y +g l o r i f i c a t i o n s +h i s t o r y +c a r n i v a l +h a s t i l y +e f f o r t +s u g g e s t i v e +u n a p p r o a c h a b l e +e a r t h +i n s t i n c t i v e +b r i g h t +d i v i n i t y +s c a l e +c l a y +m u s t +s e n t +w h o s e +a b s o r b e d +h e i g h t +g a z i n g +s t r o n g e s t +s y s t e m +w a r +s u b t l e r +l i s t e n e d +u n e q u i v o c a l +b o y s +j o y +e n t e r i n g +s e l d o m +a d m i r e d +w e a r +s t o r m +f e a t s +o u t r a g e o u s +s h a k e s p e a r e +f l i n t +e v e r y b o d y +w i t c h +m a g n e t i c +a l t h o u g h +s u c c e s s e s +t h e e +d i l a t e d +e v i l +r o a r s +d o +r e a l i t i e s +q u e s t i o n +c u l t i v a t o r +b u s i l y +u s i n g +s h o w i n g +b r i n g i n g +d o w n +l e t h a r g y +a t t e n t i o n +r o l l +g r a t i f i c a t i o n +c o n t e m p t +s u g g e s t e d +p o c k e t +b e d r o o m +r e g a i n e d +s t a n d +b e a r +p l a u s i b l e +m e a n i n g +t e l l +o r g a n +b r o k e +p u r s e +s o n s y +b a r +p u b l i c +h a l c y o n +c o n n e c t i o n +f o r c i n g +s i n k i n g +d i s g u i s e d +e v e r +e s t a b l i s h e d +e m p e r o r s +a s l e e p +y o u ' r e +s l i g h t +r e p l i e d +b o n h o m m e +l e s s o n +d i e d +d e l i g h t e d +f r e q u e n t +c a s t +f i l l e d +p r o b a b i l i t y +c o v e r e d +h a d +u n u s u a l +r i d +f a s h i o n +f i r e +e m b o d i m e n t +u n f o r t u n a t e l y +p e r f e c t i o n +d e s e r v e +f e e t +a t t a c k e d +f i r s t +r e a d i n g +b y +a u t h o r i t y +c r y +c o c k e +c o l o u r s +c o l d +m o n u m e n t s +s u m +s e r i e s +s e p a r a t e +r a r e l y +n o t h i n g +b l e n d e d +b o x w o o d +f e l l o w s +w o r d s +e n g l a n d +a f r a i d +c o o l i n g +t o n g u e +m a t r i m o n y +r a c e s +s w e l l +w i d +a n y +d i s t i n g u i s h e d +m a g n e t i s m +w a l k +a c k n o w l e d g e d +s u c c e s s i o n +w h i s p e r e d +b l e e d i n g +w o u l d +m e m b e r +d i s s a t i s f a c t i o n +t h r o w +i n t e l l e c t u a l +t a b l e +c h i l d r e n ' s +t r y i n g +b e g i n n i n +r e l u c t a n c e +g r o s s l y +g o o d +i m a g i n a t i o n +b e c o m e +b e a u t i f u l +g u n t e r +o t h e r s +f l a t t e r +m a s c u l i n e +t r o u b l e +b e h a v e d +p e r h a p s +m i t i g a t e d +f a l t e r e d +h a p p y +w a i s t +k n e w +t a m +c o u n t e n a n c e +l o d g i n g s +c o a l s +c o m p u l s i o n +e a r l y +s u r p r i s e d +d i m i n i s h e d +s h o w s +a g o +a c k n o w l e d g m e n t +p e d a n t +p r i v i l e g e +e n c h a n t e d +b a n t e r +p r o b a b l e +p r a y +d e s t i n y +p e r v e r s i t y +i n c u m b e n t +r h i n e +l i n e s +b u r i e d +w a s t e d +b a t h e d +s e v e n +c o a s t +s e c o n d l y +r o s e t t e +v e r s e s +p r e p a r e d +s w e e t h e a r t +s t e r n +c r e a t i v e +a p p r e c i a t e +c a u t i o u s +w h o m +d e l e c t u s +h a n d s o m e +m a r r i e d +y i e l d i n g +k i n g ' s +r e m i t t e d +s h o r e +f i e l d s +f u l l e s t +d i m +d e s c r i p t i o n +c o m p a n y +l u c k y +m i d +b e e r +c o n s c i o u s +d e c r e a s e d +p r e a c h e d +i n c l u d i n g +p r o t e c t e d +t h a t ' s +f o r m e r +m e a n t +j o i n t \ No newline at end of file diff --git a/doc/Decoder.rst b/doc/Decoder.rst index c335c317..da974bc4 100644 --- a/doc/Decoder.rst +++ b/doc/Decoder.rst @@ -42,7 +42,7 @@ Bytes output mode **Note**: Currently, Bytes output mode makes assumptions that hold for Chinese Mandarin models but do not hold for other language targets, such as not predicting spaces. -In bytes output mode the model predicts UTF-8 bytes directly instead of letters from an alphabet file. This idea was proposed in the paper `Bytes Are All You Need `_. This mode is enabled with the ``--utf8`` flag at training and export time. At training time, the alphabet file is not used. Instead, the model is forced to have 256 labels, with labels 0-254 corresponding to UTF-8 byte values 1-255, and label 255 is used for the CTC blank symbol. If using an external scorer at decoding time, it MUST be built according to the instructions that follow. +In bytes output mode the model predicts UTF-8 bytes directly instead of letters from an alphabet file. This idea was proposed in the paper `Bytes Are All You Need `_. This mode is enabled with the ``--bytes_output_mode`` flag at training and export time. At training time, the alphabet file is not used. Instead, the model is forced to have 256 labels, with labels 0-254 corresponding to UTF-8 byte values 1-255, and label 255 is used for the CTC blank symbol. If using an external scorer at decoding time, it MUST be built according to the instructions that follow. Bytes output mode can be useful for languages with very large alphabets, such as Mandarin written with Simplified Chinese characters. It may also be useful for building multi-language models, or as a base for transfer learning. Currently these cases are untested and unsupported. Note that bytes output mode makes assumptions that hold for Mandarin written with Simplified Chinese characters and may not hold for other languages. @@ -58,11 +58,11 @@ corresponds to the following three "words", or UTF-8 byte sequences: At decoding time, the scorer is queried every time a Unicode codepoint is predicted, instead of when a space character is predicted. From the language modeling perspective, this is a character based model. From the implementation perspective, this is a word based model, because each character is composed of multiple labels. -**Acoustic models trained with ``--utf8`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.** +**Acoustic models trained with ``--bytes_output_mode`` MUST NOT be used with an alphabet based scorer. Conversely, acoustic models trained with an alphabet file MUST NOT be used with a UTF-8 scorer.** UTF-8 scorers can be built by using an input corpus with space separated codepoints. If your corpus only contains single codepoints separated by spaces, ``generate_scorer_package`` should automatically enable bytes output mode, and it should print the message "Looks like a character based model." -If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. Bytes output mode can be forced by specifying the ``--force_utf8`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED. +If the message "Doesn't look like a character based model." is printed, you should double check your inputs to make sure it only contains single codepoints separated by spaces. Bytes output mode can be forced by specifying the ``--force_bytes_output_mode`` flag when running ``generate_scorer_package``, but it is NOT RECOMMENDED. See :ref:`scorer-scripts` for more details on using ``generate_scorer_package``. diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index 94e03b15..80edc51d 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function from . import swigwrapper # pylint: disable=import-self -from .swigwrapper import UTF8Alphabet # This module is built with SWIG_PYTHON_STRICT_BYTE_CHAR so we must handle # string encoding explicitly, here and throughout this file. @@ -89,6 +88,56 @@ class Alphabet(swigwrapper.Alphabet): return res.decode('utf-8') +class UTF8Alphabet(swigwrapper.UTF8Alphabet): + """Convenience wrapper for Alphabet which calls init in the constructor""" + def __init__(self): + super(UTF8Alphabet, self).__init__() + err = self.init(b'') + if err != 0: + raise ValueError('UTF8Alphabet initialization failed with error code 0x{:X}'.format(err)) + + def CanEncodeSingle(self, input): + ''' + Returns true if the single character/output class has a corresponding label + in the alphabet. + ''' + return super(UTF8Alphabet, self).CanEncodeSingle(input.encode('utf-8')) + + def CanEncode(self, input): + ''' + Returns true if the entire string can be encoded into labels in this + alphabet. + ''' + return super(UTF8Alphabet, self).CanEncode(input.encode('utf-8')) + + def EncodeSingle(self, input): + ''' + Encode a single character/output class into a label. Character must be in + the alphabet, this method will assert that. Use `CanEncodeSingle` to test. + ''' + return super(UTF8Alphabet, self).EncodeSingle(input.encode('utf-8')) + + def Encode(self, input): + ''' + Encode a sequence of character/output classes into a sequence of labels. + Characters are assumed to always take a single Unicode codepoint. + Characters must be in the alphabet, this method will assert that. Use + `CanEncode` and `CanEncodeSingle` to test. + ''' + # Convert SWIG's UnsignedIntVec to a Python list + res = super(UTF8Alphabet, self).Encode(input.encode('utf-8')) + return [el for el in res] + + def DecodeSingle(self, input): + res = super(UTF8Alphabet, self).DecodeSingle(input) + return res.decode('utf-8') + + def Decode(self, input): + '''Decode a sequence of labels into a string.''' + res = super(UTF8Alphabet, self).Decode(input) + return res.decode('utf-8') + + def ctc_beam_search_decoder(probs_seq, alphabet, diff --git a/native_client/generate_scorer_package.cpp b/native_client/generate_scorer_package.cpp index 4486b42c..1576a744 100644 --- a/native_client/generate_scorer_package.cpp +++ b/native_client/generate_scorer_package.cpp @@ -20,7 +20,7 @@ create_package(absl::optional alphabet_path, string lm_path, string vocab_path, string package_path, - absl::optional force_utf8, + absl::optional force_bytes_output_mode, float default_alpha, float default_beta) { @@ -43,27 +43,27 @@ create_package(absl::optional alphabet_path, << (vocab_looks_char_based ? "Looks" : "Doesn't look") << " like a character based (Bytes Are All You Need) model.\n"; - if (!force_utf8.has_value()) { - force_utf8 = vocab_looks_char_based; - cerr << "--force_utf8 was not specified, using value " + if (!force_bytes_output_mode.has_value()) { + force_bytes_output_mode = vocab_looks_char_based; + cerr << "--force_bytes_output_mode was not specified, using value " << "infered from vocabulary contents: " << (vocab_looks_char_based ? "true" : "false") << "\n"; } - if (!force_utf8.value() && !alphabet_path.has_value()) { + if (!force_bytes_output_mode.value() && !alphabet_path.has_value()) { cerr << "No --alphabet file specified, not using bytes output mode, can't continue.\n"; return 1; } Scorer scorer; - if (force_utf8.value()) { + if (force_bytes_output_mode.value()) { scorer.set_alphabet(UTF8Alphabet()); } else { Alphabet alphabet; alphabet.init(alphabet_path->c_str()); scorer.set_alphabet(alphabet); } - scorer.set_utf8_mode(force_utf8.value()); + scorer.set_utf8_mode(force_bytes_output_mode.value()); scorer.reset_params(default_alpha, default_beta); int err = scorer.load_lm(lm_path); if (err != DS_ERR_SCORER_NO_TRIE) { @@ -96,13 +96,13 @@ main(int argc, char** argv) po::options_description desc("Options"); desc.add_options() ("help", "show help message") - ("alphabet", po::value(), "Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using UTF-8 mode.") + ("alphabet", po::value(), "Path of alphabet file to use for vocabulary construction. Words with characters not in the alphabet will not be included in the vocabulary. Optional if using bytes output mode.") ("lm", po::value(), "Path of KenLM binary LM file. Must be built without including the vocabulary (use the -v flag). See generate_lm.py for how to create a binary LM.") ("vocab", po::value(), "Path of vocabulary file. Must contain words separated by whitespace.") ("package", po::value(), "Path to save scorer package.") ("default_alpha", po::value(), "Default value of alpha hyperparameter (float).") ("default_beta", po::value(), "Default value of beta hyperparameter (float).") - ("force_utf8", po::value(), "Boolean flag, force set or unset UTF-8 mode in the scorer package. If not set, infers from the vocabulary. See for further explanation.") + ("force_bytes_output_mode", po::value(), "Boolean flag, force set or unset bytes output mode in the scorer package. If not set, infers from the vocabulary. See for further explanation.") ; po::variables_map vm; @@ -122,10 +122,10 @@ main(int argc, char** argv) } } - // Parse optional --force_utf8 - absl::optional force_utf8 = absl::nullopt; - if (vm.count("force_utf8")) { - force_utf8 = vm["force_utf8"].as(); + // Parse optional --force_bytes_output_mode + absl::optional force_bytes_output_mode = absl::nullopt; + if (vm.count("force_bytes_output_mode")) { + force_bytes_output_mode = vm["force_bytes_output_mode"].as(); } // Parse optional --alphabet @@ -138,7 +138,7 @@ main(int argc, char** argv) vm["lm"].as(), vm["vocab"].as(), vm["package"].as(), - force_utf8, + force_bytes_output_mode, vm["default_alpha"].as(), vm["default_beta"].as()); diff --git a/taskcluster/tc-all-utils.sh b/taskcluster/tc-all-utils.sh index 2e8d0d76..3f877c5a 100755 --- a/taskcluster/tc-all-utils.sh +++ b/taskcluster/tc-all-utils.sh @@ -98,6 +98,7 @@ download_data() ${WGET} -P "${TASKCLUSTER_TMP_DIR}" "${model_source_mmap}" cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/*.wav ${TASKCLUSTER_TMP_DIR}/ cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.scorer + cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/pruned_lm.bytes.scorer ${TASKCLUSTER_TMP_DIR}/kenlm.bytes.scorer cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources } diff --git a/taskcluster/tc-cpp-bytes-ds-tests.sh b/taskcluster/tc-cpp-bytes-ds-tests.sh new file mode 100644 index 00000000..20669af6 --- /dev/null +++ b/taskcluster/tc-cpp-bytes-ds-tests.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +bitrate=$1 +set_ldc_sample_filename "${bitrate}" + +download_material "${TASKCLUSTER_TMP_DIR}/ds" + +export PATH=${TASKCLUSTER_TMP_DIR}/ds/:$PATH + +# Bytes output mode with LDC93S1 takes too long to converge so we simply test +# that loading the model won't crash +check_versions diff --git a/taskcluster/tc-train-extra-tests.sh b/taskcluster/tc-train-extra-tests.sh index 62ec225e..8ecf9465 100644 --- a/taskcluster/tc-train-extra-tests.sh +++ b/taskcluster/tc-train-extra-tests.sh @@ -54,10 +54,30 @@ pushd ${HOME}/DeepSpeech/ds/ # Test --metrics_files training argument time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}" + + # Test training with bytes output mode + time ./bin/run-tc-ldc93s1_new_bytes.sh 200 "${sample_rate}" + time ./bin/run-tc-ldc93s1_new_bytes_tflite.sh "${sample_rate}" popd +# Save exported model artifacts from bytes output mode training +cp /tmp/train_bytes/output_graph.pb ${TASKCLUSTER_ARTIFACTS}/output_graph.pb +cp /tmp/train_bytes_tflite/output_graph.tflite ${TASKCLUSTER_ARTIFACTS}/output_graph.tflite + pushd ${HOME}/DeepSpeech/ds/ + python util/taskcluster.py --source tensorflow --artifact convert_graphdef_memmapped_format --branch r1.15 --target /tmp/ +popd + +/tmp/convert_graphdef_memmapped_format --in_graph=/tmp/train_bytes/output_graph.pb --out_graph=/tmp/train_bytes/output_graph.pbmm +cp /tmp/train_bytes/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS} + +# Test resuming from checkpoints created above +pushd ${HOME}/DeepSpeech/ds/ + # SDB, resuming from checkpoint time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh + + # Bytes output mode, resuming from checkpoint + time ./bin/run-tc-ldc93s1_checkpoint_bytes.sh popd virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/test-cpp_16k_bytes-darwin-amd64-opt.yml b/taskcluster/test-cpp_16k_bytes-darwin-amd64-opt.yml new file mode 100644 index 00000000..0b1151c1 --- /dev/null +++ b/taskcluster/test-cpp_16k_bytes-darwin-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-darwin-opt-base.tyml + dependencies: + - "darwin-amd64-cpu-opt" + - "test-training-extra_16k-linux-amd64-py36m-opt" + - "homebrew_tests-darwin-amd64" + test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "$TASKCLUSTER_TASK_DIR/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k" + metadata: + name: "DeepSpeech OSX AMD64 CPU C++ tests (Bytes Output Model, 16kHz)" + description: "Testing DeepSpeech C++ for OSX/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)" diff --git a/taskcluster/test-cpp_16k_bytes-linux-amd64-opt.yml b/taskcluster/test-cpp_16k_bytes-linux-amd64-opt.yml new file mode 100644 index 00000000..4d483392 --- /dev/null +++ b/taskcluster/test-cpp_16k_bytes-linux-amd64-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-cpu-opt" + - "test-training-extra_16k-linux-amd64-py36m-opt" + test_model_task: "test-training-extra_16k-linux-amd64-py36m-opt" + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-cpp-bytes-ds-tests.sh 16k" + workerType: "${docker.dsTests}" + metadata: + name: "DeepSpeech Linux AMD64 CPU C++ tests (Bytes Output Model, 16kHz)" + description: "Testing DeepSpeech C++ for Linux/AMD64, CPU only, optimized version (Bytes Output Model, 16kHz)" diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 2bd580b5..0b9929e5 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -83,7 +83,7 @@ def initialize_globals(): if not c.available_devices: c.available_devices = [c.cpu_device] - if FLAGS.utf8: + if FLAGS.bytes_output_mode: c.alphabet = UTF8Alphabet() else: c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) diff --git a/training/deepspeech_training/util/evaluate_tools.py b/training/deepspeech_training/util/evaluate_tools.py index e482211e..66fc8293 100644 --- a/training/deepspeech_training/util/evaluate_tools.py +++ b/training/deepspeech_training/util/evaluate_tools.py @@ -72,7 +72,7 @@ def calculate_and_print_report(wav_filenames, labels, decodings, losses, dataset samples.sort(key=lambda s: s.loss, reverse=True) # Then order by ascending WER/CER - if FLAGS.utf8: + if FLAGS.bytes_output_mode: samples.sort(key=lambda s: s.cer) else: samples.sort(key=lambda s: s.wer) diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py index fe78f0b7..cf321594 100644 --- a/training/deepspeech_training/util/flags.py +++ b/training/deepspeech_training/util/flags.py @@ -156,7 +156,7 @@ def create_flags(): # Decoder - f.DEFINE_boolean('utf8', False, 'enable UTF-8 mode. When this is used the model outputs UTF-8 sequences directly rather than using an alphabet mapping.') + f.DEFINE_boolean('bytes_output_mode', False, 'enable Bytes Output Mode mode. When this is used the model outputs UTF-8 byte values directly rather than using an alphabet mapping. The --alphabet_config_path option will be ignored. See the training documentation for more details.') f.DEFINE_string('alphabet_config_path', 'data/alphabet.txt', 'path to the configuration file specifying the alphabet used by the network. See the comment in data/alphabet.txt for a description of the format.') f.DEFINE_string('scorer_path', '', 'path to the external scorer file.') f.DEFINE_alias('scorer', 'scorer_path')