Update KenLM to b9f35777d112ce2fc10bd3986302517a16dc3883
This commit is contained in:
parent
93e5ce498a
commit
d65422c8ab
3
native_client/kenlm/.gitignore
vendored
3
native_client/kenlm/.gitignore
vendored
@ -3,6 +3,9 @@ util/file_piece.cc.gz
|
|||||||
*.o
|
*.o
|
||||||
doc/
|
doc/
|
||||||
build/
|
build/
|
||||||
|
/bin
|
||||||
|
/lib
|
||||||
|
/tests
|
||||||
._*
|
._*
|
||||||
windows/Win32
|
windows/Win32
|
||||||
windows/x64
|
windows/x64
|
||||||
|
@ -12,3 +12,7 @@ If you only want the query code and do not care about compression (.gz, .bz2, an
|
|||||||
Windows:
|
Windows:
|
||||||
The windows directory has visual studio files. Note that you need to compile
|
The windows directory has visual studio files. Note that you need to compile
|
||||||
the kenlm project before build_binary and ngram_query projects.
|
the kenlm project before build_binary and ngram_query projects.
|
||||||
|
|
||||||
|
OSX:
|
||||||
|
Missing dependencies can be remedied with brew.
|
||||||
|
brew install cmake boost eigen
|
||||||
|
@ -1 +1 @@
|
|||||||
cdd794598ea15dc23a7daaf7a8cf89423c97f7e6
|
b9f35777d112ce2fc10bd3986302517a16dc3883
|
||||||
|
@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
Language model inference code by Kenneth Heafield (kenlm at kheafield.com)
|
Language model inference code by Kenneth Heafield (kenlm at kheafield.com)
|
||||||
|
|
||||||
I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get http://kheafield.com/code/kenlm.tar.gz .
|
I do development in master on https://github.com/kpu/kenlm/. Normally, it works, but I do not guarantee it will compile, give correct answers, or generate non-broken binary files. For a more stable release, get https://kheafield.com/code/kenlm.tar.gz .
|
||||||
|
|
||||||
The website http://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder.
|
The website https://kheafield.com/code/kenlm/ has more documentation. If you're a decoder developer, please download the latest version from there instead of copying from another decoder.
|
||||||
|
|
||||||
## Compiling
|
## Compiling
|
||||||
Use cmake, see [BUILDING](BUILDING) for more detail.
|
Use cmake, see [BUILDING](BUILDING) for more detail.
|
||||||
@ -33,7 +33,7 @@ lmplz estimates unpruned language models with modified Kneser-Ney smoothing. Af
|
|||||||
```bash
|
```bash
|
||||||
bin/lmplz -o 5 <text >text.arpa
|
bin/lmplz -o 5 <text >text.arpa
|
||||||
```
|
```
|
||||||
The algorithm is on-disk, using an amount of memory that you specify. See http://kheafield.com/code/kenlm/estimation/ for more.
|
The algorithm is on-disk, using an amount of memory that you specify. See https://kheafield.com/code/kenlm/estimation/ for more.
|
||||||
|
|
||||||
MT Marathon 2012 team members Ivan Pouzyrevsky and Mohammed Mediani contributed to the computation design and early implementation. Jon Clark contributed to the design, clarified points about smoothing, and added logging.
|
MT Marathon 2012 team members Ivan Pouzyrevsky and Mohammed Mediani contributed to the computation design and early implementation. Jon Clark contributed to the design, clarified points about smoothing, and added logging.
|
||||||
|
|
||||||
@ -43,15 +43,15 @@ filter takes an ARPA or count file and removes entries that will never be querie
|
|||||||
```bash
|
```bash
|
||||||
bin/filter
|
bin/filter
|
||||||
```
|
```
|
||||||
and see http://kheafield.com/code/kenlm/filter/ for more documentation.
|
and see https://kheafield.com/code/kenlm/filter/ for more documentation.
|
||||||
|
|
||||||
## Querying
|
## Querying
|
||||||
|
|
||||||
Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and a bit slower.
|
Two data structures are supported: probing and trie. Probing is a probing hash table with keys that are 64-bit hashes of n-grams and floats as values. Trie is a fairly standard trie but with bit-level packing so it uses the minimum number of bits to store word indices and pointers. The trie node entries are sorted by word index. Probing is the fastest and uses the most memory. Trie uses the least memory and is a bit slower.
|
||||||
|
|
||||||
As is the custom in language modeling, all probabilities are log base 10.
|
As is the custom in language modeling, all probabilities are log base 10.
|
||||||
|
|
||||||
With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See http://kheafield.com/code/kenlm/benchmark/.
|
With trie, resident memory is 58% of IRST's smallest version and 21% of SRI's compact version. Simultaneously, trie CPU's use is 81% of IRST's fastest version and 84% of SRI's fast version. KenLM's probing hash table implementation goes even faster at the expense of using more memory. See https://kheafield.com/code/kenlm/benchmark/.
|
||||||
|
|
||||||
Binary format via mmap is supported. Run `./build_binary` to make one then pass the binary file name to the appropriate Model constructor.
|
Binary format via mmap is supported. Run `./build_binary` to make one then pass the binary file name to the appropriate Model constructor.
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ Hideo Okuma and Tomoyuki Yoshimura from NICT contributed ports to ARM and MinGW.
|
|||||||
|
|
||||||
- Select the macros you want, listed in the previous section.
|
- Select the macros you want, listed in the previous section.
|
||||||
|
|
||||||
- There are two build systems: compile.sh and Jamroot+Jamfile. They're pretty simple and are intended to be reimplemented in your build system.
|
- There are two build systems: compile.sh and cmake. They're pretty simple and are intended to be reimplemented in your build system.
|
||||||
|
|
||||||
- Use either the interface in `lm/model.hh` or `lm/virtual_interface.hh`. Interface documentation is in comments of `lm/virtual_interface.hh` and `lm/model.hh`.
|
- Use either the interface in `lm/model.hh` or `lm/virtual_interface.hh`. Interface documentation is in comments of `lm/virtual_interface.hh` and `lm/model.hh`.
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
KenLM source downloaded from http://kheafield.com/code/kenlm.tar.gz on 2017/08/05
|
KenLM source downloaded from https://github.com/kpu/kenlm on 2020/01/15
|
||||||
sha256 c4c9f587048470c9a6a592914f0609a71fbb959f0a4cad371e8c355ce81f7c6b
|
commit b9f35777d112ce2fc10bd3986302517a16dc3883
|
||||||
|
|
||||||
This corresponds to https://github.com/kpu/kenlm/commit/cdd794598ea15dc23a7daaf7a8cf89423c97f7e6
|
This corresponds to https://github.com/kpu/kenlm/commit/b9f35777d112ce2fc10bd3986302517a16dc3883
|
||||||
|
|
||||||
The following procedure was run to remove unneeded files:
|
The following procedure was run to remove unneeded files:
|
||||||
|
|
||||||
@ -10,19 +10,3 @@ rm -rf windows include lm/filter lm/builder util/stream util/getopt.* python
|
|||||||
|
|
||||||
This was done in order to ensure uniqueness of double_conversion:
|
This was done in order to ensure uniqueness of double_conversion:
|
||||||
git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/double_conversion/kenlm_double_conversion/g'
|
git grep 'double_conversion' | cut -d':' -f1 | sort | uniq | xargs sed -ri 's/double_conversion/kenlm_double_conversion/g'
|
||||||
|
|
||||||
Please apply this patch to be able to build on Android:
|
|
||||||
diff --git a/native_client/kenlm/util/file.cc b/native_client/kenlm/util/file.cc
|
|
||||||
index d53dc0a..b5e36b2 100644
|
|
||||||
--- a/native_client/kenlm/util/file.cc
|
|
||||||
+++ b/native_client/kenlm/util/file.cc
|
|
||||||
@@ -540,7 +540,7 @@ std::string DefaultTempDirectory() {
|
|
||||||
const char *const vars[] = {"TMPDIR", "TMP", "TEMPDIR", "TEMP", 0};
|
|
||||||
for (int i=0; vars[i]; ++i) {
|
|
||||||
char *val =
|
|
||||||
-#if defined(_GNU_SOURCE)
|
|
||||||
+#if defined(_GNU_SOURCE) && defined(__GLIBC_PREREQ)
|
|
||||||
#if __GLIBC_PREREQ(2,17)
|
|
||||||
secure_getenv
|
|
||||||
#else // __GLIBC_PREREQ
|
|
||||||
|
|
||||||
|
@ -10,7 +10,6 @@
|
|||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#include "util/getopt.hh"
|
#include "util/getopt.hh"
|
||||||
@ -23,11 +22,12 @@ namespace ngram {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
void Usage(const char *name, const char *default_mem) {
|
void Usage(const char *name, const char *default_mem) {
|
||||||
std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n"
|
std::cerr << "Usage: " << name << " [-u log10_unknown_probability] [-s] [-i] [-v] [-w mmap|after] [-p probing_multiplier] [-T trie_temporary] [-S trie_building_mem] [-q bits] [-b bits] [-a bits] [type] input.arpa [output.mmap]\n\n"
|
||||||
"-u sets the log10 probability for <unk> if the ARPA file does not have one.\n"
|
"-u sets the log10 probability for <unk> if the ARPA file does not have one.\n"
|
||||||
" Default is -100. The ARPA file will always take precedence.\n"
|
" Default is -100. The ARPA file will always take precedence.\n"
|
||||||
"-s allows models to be built even if they do not have <s> and </s>.\n"
|
"-s allows models to be built even if they do not have <s> and </s>.\n"
|
||||||
"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
|
"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
|
||||||
|
"-v disables inclusion of the vocabulary in the binary file.\n"
|
||||||
"-w mmap|after determines how writing is done.\n"
|
"-w mmap|after determines how writing is done.\n"
|
||||||
" mmap maps the binary file and writes to it. Default for trie.\n"
|
" mmap maps the binary file and writes to it. Default for trie.\n"
|
||||||
" after allocates anonymous memory, builds, and writes. Default for probing.\n"
|
" after allocates anonymous memory, builds, and writes. Default for probing.\n"
|
||||||
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
|
|||||||
lm::ngram::Config config;
|
lm::ngram::Config config;
|
||||||
config.building_memory = util::ParseSize(default_mem);
|
config.building_memory = util::ParseSize(default_mem);
|
||||||
int opt;
|
int opt;
|
||||||
while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:h")) != -1) {
|
while ((opt = getopt(argc, argv, "q:b:a:u:p:t:T:m:S:w:sir:vh")) != -1) {
|
||||||
switch(opt) {
|
switch(opt) {
|
||||||
case 'q':
|
case 'q':
|
||||||
config.prob_bits = ParseBitCount(optarg);
|
config.prob_bits = ParseBitCount(optarg);
|
||||||
@ -165,6 +165,9 @@ int main(int argc, char *argv[]) {
|
|||||||
ParseFileList(optarg, config.rest_lower_files);
|
ParseFileList(optarg, config.rest_lower_files);
|
||||||
config.rest_function = Config::REST_LOWER;
|
config.rest_function = Config::REST_LOWER;
|
||||||
break;
|
break;
|
||||||
|
case 'v':
|
||||||
|
config.include_vocab = false;
|
||||||
|
break;
|
||||||
case 'h': // help
|
case 'h': // help
|
||||||
default:
|
default:
|
||||||
Usage(argv[0], default_mem);
|
Usage(argv[0], default_mem);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
|
* sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
|
||||||
*/
|
*/
|
||||||
#ifndef KENLM_ORDER_MESSAGE
|
#ifndef KENLM_ORDER_MESSAGE
|
||||||
#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. In the KenLM tarball or Moses, use e.g. `bjam --max-kenlm-order=6 -a'. Otherwise, edit lm/max_order.hh."
|
#define KENLM_ORDER_MESSAGE "If your build system supports changing KENLM_MAX_ORDER, change it there and recompile. With cmake:\n cmake -DKENLM_MAX_ORDER=10 ..\nWith Moses:\n bjam --max-kenlm-order=10 -a\nOtherwise, edit lm/max_order.hh."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // LM_MAX_ORDER_H
|
#endif // LM_MAX_ORDER_H
|
||||||
|
@ -19,8 +19,8 @@ void Usage(const char *name) {
|
|||||||
"Each word in the output is formatted as:\n"
|
"Each word in the output is formatted as:\n"
|
||||||
" word=vocab_id ngram_length log10(p(word|context))\n"
|
" word=vocab_id ngram_length log10(p(word|context))\n"
|
||||||
"where ngram_length is the length of n-gram matched. A vocab_id of 0 indicates\n"
|
"where ngram_length is the length of n-gram matched. A vocab_id of 0 indicates\n"
|
||||||
"indicates the unknown word. Sentence-level output includes log10 probability of\n"
|
"the unknown word. Sentence-level output includes log10 probability of the\n"
|
||||||
"the sentence and OOV count.\n";
|
"sentence and OOV count.\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,8 +19,8 @@
|
|||||||
|
|
||||||
namespace lm {
|
namespace lm {
|
||||||
|
|
||||||
// 1 for '\t', '\n', and ' '. This is stricter than isspace.
|
// 1 for '\t', '\n', '\r', and ' '. This is stricter than isspace. Apparently ARPA allows vertical tab inside a word.
|
||||||
const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
const bool kARPASpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -85,6 +85,11 @@ void ReadNGramHeader(util::FilePiece &in, unsigned int length) {
|
|||||||
if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead");
|
if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ConsumeNewline(util::FilePiece &in) {
|
||||||
|
char follow = in.get();
|
||||||
|
UTIL_THROW_IF('\n' != follow, FormatLoadException, "Expected newline got '" << follow << "'");
|
||||||
|
}
|
||||||
|
|
||||||
void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
|
void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
|
||||||
switch (in.get()) {
|
switch (in.get()) {
|
||||||
case '\t':
|
case '\t':
|
||||||
@ -94,6 +99,9 @@ void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
|
|||||||
UTIL_THROW(FormatLoadException, "Non-zero backoff " << got << " provided for an n-gram that should have no backoff");
|
UTIL_THROW(FormatLoadException, "Non-zero backoff " << got << " provided for an n-gram that should have no backoff");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case '\r':
|
||||||
|
ConsumeNewline(in);
|
||||||
|
// Intentionally no break.
|
||||||
case '\n':
|
case '\n':
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -120,8 +128,18 @@ void ReadBackoff(util::FilePiece &in, float &backoff) {
|
|||||||
UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
|
UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
|
switch (char got = in.get()) {
|
||||||
|
case '\r':
|
||||||
|
ConsumeNewline(in);
|
||||||
|
case '\n':
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
UTIL_THROW(FormatLoadException, "Expected newline after backoffs, got " << got);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '\r':
|
||||||
|
ConsumeNewline(in);
|
||||||
|
// Intentionally no break.
|
||||||
case '\n':
|
case '\n':
|
||||||
backoff = ngram::kNoExtensionBackoff;
|
backoff = ngram::kNoExtensionBackoff;
|
||||||
break;
|
break;
|
||||||
|
@ -282,7 +282,7 @@ void ProbingVocabulary::LoadedBinary(bool have_words, int fd, EnumerateVocab *to
|
|||||||
if (have_words) ReadWords(fd, to, bound_, offset);
|
if (have_words) ReadWords(fd, to, bound_, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {
|
void MissingUnknown(const Config &config) {
|
||||||
switch(config.unknown_missing) {
|
switch(config.unknown_missing) {
|
||||||
case SILENT:
|
case SILENT:
|
||||||
return;
|
return;
|
||||||
@ -294,7 +294,7 @@ void MissingUnknown(const Config &config) throw(SpecialWordMissingException) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException) {
|
void MissingSentenceMarker(const Config &config, const char *str) {
|
||||||
switch (config.sentence_marker_missing) {
|
switch (config.sentence_marker_missing) {
|
||||||
case SILENT:
|
case SILENT:
|
||||||
return;
|
return;
|
||||||
|
@ -207,10 +207,10 @@ class ProbingVocabulary : public base::Vocabulary {
|
|||||||
detail::ProbingVocabularyHeader *header_;
|
detail::ProbingVocabularyHeader *header_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void MissingUnknown(const Config &config) throw(SpecialWordMissingException);
|
void MissingUnknown(const Config &config);
|
||||||
void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialWordMissingException);
|
void MissingSentenceMarker(const Config &config, const char *str);
|
||||||
|
|
||||||
template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) throw(SpecialWordMissingException) {
|
template <class Vocab> void CheckSpecials(const Config &config, const Vocab &vocab) {
|
||||||
if (!vocab.SawUnk()) MissingUnknown(config);
|
if (!vocab.SawUnk()) MissingUnknown(config);
|
||||||
if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, "<s>");
|
if (vocab.BeginSentence() == vocab.NotFound()) MissingSentenceMarker(config, "<s>");
|
||||||
if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, "</s>");
|
if (vocab.EndSentence() == vocab.NotFound()) MissingSentenceMarker(config, "</s>");
|
||||||
|
@ -2,6 +2,8 @@ from setuptools import setup, Extension
|
|||||||
import glob
|
import glob
|
||||||
import platform
|
import platform
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
#Does gcc compile with this header and library?
|
#Does gcc compile with this header and library?
|
||||||
def compile_test(header, library):
|
def compile_test(header, library):
|
||||||
@ -9,16 +11,28 @@ def compile_test(header, library):
|
|||||||
command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\""
|
command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\""
|
||||||
return os.system(command) == 0
|
return os.system(command) == 0
|
||||||
|
|
||||||
|
max_order = "6"
|
||||||
|
is_max_order = [s for s in sys.argv if "--max_order" in s]
|
||||||
|
for element in is_max_order:
|
||||||
|
max_order = re.split('[= ]',element)[1]
|
||||||
|
sys.argv.remove(element)
|
||||||
|
|
||||||
FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc')
|
FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob('util/double-conversion/*.cc') + glob.glob('python/*.cc')
|
||||||
FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))]
|
FILES = [fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc'))]
|
||||||
|
|
||||||
LIBS = ['stdc++']
|
if platform.system() == 'Linux':
|
||||||
if platform.system() != 'Darwin':
|
LIBS = ['stdc++', 'rt']
|
||||||
LIBS.append('rt')
|
elif platform.system() == 'Darwin':
|
||||||
|
LIBS = ['c++']
|
||||||
|
else:
|
||||||
|
LIBS = []
|
||||||
|
|
||||||
#We don't need -std=c++11 but python seems to be compiled with it now. https://github.com/kpu/kenlm/issues/86
|
#We don't need -std=c++11 but python seems to be compiled with it now. https://github.com/kpu/kenlm/issues/86
|
||||||
ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11']
|
ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER='+max_order, '-std=c++11']
|
||||||
|
|
||||||
|
#Attempted fix to https://github.com/kpu/kenlm/issues/186 and https://github.com/kpu/kenlm/issues/197
|
||||||
|
if platform.system() == 'Darwin':
|
||||||
|
ARGS += ["-stdlib=libc++", "-mmacosx-version-min=10.7"]
|
||||||
|
|
||||||
if compile_test('zlib.h', 'z'):
|
if compile_test('zlib.h', 'z'):
|
||||||
ARGS.append('-DHAVE_ZLIB')
|
ARGS.append('-DHAVE_ZLIB')
|
||||||
|
@ -108,7 +108,7 @@ typedef union { float f; uint32_t i; } FloatEnc;
|
|||||||
|
|
||||||
inline float ReadFloat32(const void *base, uint64_t bit_off) {
|
inline float ReadFloat32(const void *base, uint64_t bit_off) {
|
||||||
FloatEnc encoded;
|
FloatEnc encoded;
|
||||||
encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32);
|
encoded.i = static_cast<uint32_t>(ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 32));
|
||||||
return encoded.f;
|
return encoded.f;
|
||||||
}
|
}
|
||||||
inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
|
inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
|
||||||
@ -135,7 +135,7 @@ inline void UnsetSign(float &to) {
|
|||||||
|
|
||||||
inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
|
inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
|
||||||
FloatEnc encoded;
|
FloatEnc encoded;
|
||||||
encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31);
|
encoded.i = static_cast<uint32_t>(ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31));
|
||||||
// Sign bit set means negative.
|
// Sign bit set means negative.
|
||||||
encoded.i |= kSignBit;
|
encoded.i |= kSignBit;
|
||||||
return encoded.f;
|
return encoded.f;
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cmath>
|
#include <math.h>
|
||||||
|
|
||||||
#include "bignum-dtoa.h"
|
#include "bignum-dtoa.h"
|
||||||
|
|
||||||
@ -192,13 +192,13 @@ static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
|
|||||||
delta_plus = delta_minus;
|
delta_plus = delta_minus;
|
||||||
}
|
}
|
||||||
*length = 0;
|
*length = 0;
|
||||||
while (true) {
|
for (;;) {
|
||||||
uint16_t digit;
|
uint16_t digit;
|
||||||
digit = numerator->DivideModuloIntBignum(*denominator);
|
digit = numerator->DivideModuloIntBignum(*denominator);
|
||||||
ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
|
ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
|
||||||
// digit = numerator / denominator (integer division).
|
// digit = numerator / denominator (integer division).
|
||||||
// numerator = numerator % denominator.
|
// numerator = numerator % denominator.
|
||||||
buffer[(*length)++] = digit + '0';
|
buffer[(*length)++] = static_cast<char>(digit + '0');
|
||||||
|
|
||||||
// Can we stop already?
|
// Can we stop already?
|
||||||
// If the remainder of the division is less than the distance to the lower
|
// If the remainder of the division is less than the distance to the lower
|
||||||
@ -282,7 +282,7 @@ static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
|
|||||||
// exponent (decimal_point), when rounding upwards.
|
// exponent (decimal_point), when rounding upwards.
|
||||||
static void GenerateCountedDigits(int count, int* decimal_point,
|
static void GenerateCountedDigits(int count, int* decimal_point,
|
||||||
Bignum* numerator, Bignum* denominator,
|
Bignum* numerator, Bignum* denominator,
|
||||||
Vector<char>(buffer), int* length) {
|
Vector<char> buffer, int* length) {
|
||||||
ASSERT(count >= 0);
|
ASSERT(count >= 0);
|
||||||
for (int i = 0; i < count - 1; ++i) {
|
for (int i = 0; i < count - 1; ++i) {
|
||||||
uint16_t digit;
|
uint16_t digit;
|
||||||
@ -290,7 +290,7 @@ static void GenerateCountedDigits(int count, int* decimal_point,
|
|||||||
ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
|
ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive.
|
||||||
// digit = numerator / denominator (integer division).
|
// digit = numerator / denominator (integer division).
|
||||||
// numerator = numerator % denominator.
|
// numerator = numerator % denominator.
|
||||||
buffer[i] = digit + '0';
|
buffer[i] = static_cast<char>(digit + '0');
|
||||||
// Prepare for next iteration.
|
// Prepare for next iteration.
|
||||||
numerator->Times10();
|
numerator->Times10();
|
||||||
}
|
}
|
||||||
@ -300,7 +300,8 @@ static void GenerateCountedDigits(int count, int* decimal_point,
|
|||||||
if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
|
if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
|
||||||
digit++;
|
digit++;
|
||||||
}
|
}
|
||||||
buffer[count - 1] = digit + '0';
|
ASSERT(digit <= 10);
|
||||||
|
buffer[count - 1] = static_cast<char>(digit + '0');
|
||||||
// Correct bad digits (in case we had a sequence of '9's). Propagate the
|
// Correct bad digits (in case we had a sequence of '9's). Propagate the
|
||||||
// carry until we hat a non-'9' or til we reach the first digit.
|
// carry until we hat a non-'9' or til we reach the first digit.
|
||||||
for (int i = count - 1; i > 0; --i) {
|
for (int i = count - 1; i > 0; --i) {
|
||||||
|
@ -40,6 +40,7 @@ Bignum::Bignum()
|
|||||||
|
|
||||||
template<typename S>
|
template<typename S>
|
||||||
static int BitSize(S value) {
|
static int BitSize(S value) {
|
||||||
|
(void) value; // Mark variable as used.
|
||||||
return 8 * sizeof(value);
|
return 8 * sizeof(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +104,7 @@ void Bignum::AssignDecimalString(Vector<const char> value) {
|
|||||||
const int kMaxUint64DecimalDigits = 19;
|
const int kMaxUint64DecimalDigits = 19;
|
||||||
Zero();
|
Zero();
|
||||||
int length = value.length();
|
int length = value.length();
|
||||||
int pos = 0;
|
unsigned int pos = 0;
|
||||||
// Let's just say that each digit needs 4 bits.
|
// Let's just say that each digit needs 4 bits.
|
||||||
while (length >= kMaxUint64DecimalDigits) {
|
while (length >= kMaxUint64DecimalDigits) {
|
||||||
uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits);
|
uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits);
|
||||||
@ -122,9 +123,8 @@ void Bignum::AssignDecimalString(Vector<const char> value) {
|
|||||||
static int HexCharValue(char c) {
|
static int HexCharValue(char c) {
|
||||||
if ('0' <= c && c <= '9') return c - '0';
|
if ('0' <= c && c <= '9') return c - '0';
|
||||||
if ('a' <= c && c <= 'f') return 10 + c - 'a';
|
if ('a' <= c && c <= 'f') return 10 + c - 'a';
|
||||||
if ('A' <= c && c <= 'F') return 10 + c - 'A';
|
ASSERT('A' <= c && c <= 'F');
|
||||||
UNREACHABLE();
|
return 10 + c - 'A';
|
||||||
return 0; // To make compiler happy.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -501,13 +501,14 @@ uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) {
|
|||||||
// Start by removing multiples of 'other' until both numbers have the same
|
// Start by removing multiples of 'other' until both numbers have the same
|
||||||
// number of digits.
|
// number of digits.
|
||||||
while (BigitLength() > other.BigitLength()) {
|
while (BigitLength() > other.BigitLength()) {
|
||||||
// This naive approach is extremely inefficient if the this divided other
|
// This naive approach is extremely inefficient if `this` divided by other
|
||||||
// might be big. This function is implemented for doubleToString where
|
// is big. This function is implemented for doubleToString where
|
||||||
// the result should be small (less than 10).
|
// the result should be small (less than 10).
|
||||||
ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16));
|
ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16));
|
||||||
|
ASSERT(bigits_[used_digits_ - 1] < 0x10000);
|
||||||
// Remove the multiples of the first digit.
|
// Remove the multiples of the first digit.
|
||||||
// Example this = 23 and other equals 9. -> Remove 2 multiples.
|
// Example this = 23 and other equals 9. -> Remove 2 multiples.
|
||||||
result += bigits_[used_digits_ - 1];
|
result += static_cast<uint16_t>(bigits_[used_digits_ - 1]);
|
||||||
SubtractTimes(other, bigits_[used_digits_ - 1]);
|
SubtractTimes(other, bigits_[used_digits_ - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -523,13 +524,15 @@ uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) {
|
|||||||
// Shortcut for easy (and common) case.
|
// Shortcut for easy (and common) case.
|
||||||
int quotient = this_bigit / other_bigit;
|
int quotient = this_bigit / other_bigit;
|
||||||
bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient;
|
bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient;
|
||||||
result += quotient;
|
ASSERT(quotient < 0x10000);
|
||||||
|
result += static_cast<uint16_t>(quotient);
|
||||||
Clamp();
|
Clamp();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int division_estimate = this_bigit / (other_bigit + 1);
|
int division_estimate = this_bigit / (other_bigit + 1);
|
||||||
result += division_estimate;
|
ASSERT(division_estimate < 0x10000);
|
||||||
|
result += static_cast<uint16_t>(division_estimate);
|
||||||
SubtractTimes(other, division_estimate);
|
SubtractTimes(other, division_estimate);
|
||||||
|
|
||||||
if (other_bigit * (division_estimate + 1) > this_bigit) {
|
if (other_bigit * (division_estimate + 1) > this_bigit) {
|
||||||
@ -560,8 +563,8 @@ static int SizeInHexChars(S number) {
|
|||||||
|
|
||||||
static char HexCharOfValue(int value) {
|
static char HexCharOfValue(int value) {
|
||||||
ASSERT(0 <= value && value <= 16);
|
ASSERT(0 <= value && value <= 16);
|
||||||
if (value < 10) return value + '0';
|
if (value < 10) return static_cast<char>(value + '0');
|
||||||
return value - 10 + 'A';
|
return static_cast<char>(value - 10 + 'A');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -755,7 +758,6 @@ void Bignum::SubtractTimes(const Bignum& other, int factor) {
|
|||||||
Chunk difference = bigits_[i] - borrow;
|
Chunk difference = bigits_[i] - borrow;
|
||||||
bigits_[i] = difference & kBigitMask;
|
bigits_[i] = difference & kBigitMask;
|
||||||
borrow = difference >> (kChunkSize - 1);
|
borrow = difference >> (kChunkSize - 1);
|
||||||
++i;
|
|
||||||
}
|
}
|
||||||
Clamp();
|
Clamp();
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,6 @@ class Bignum {
|
|||||||
|
|
||||||
void AssignPowerUInt16(uint16_t base, int exponent);
|
void AssignPowerUInt16(uint16_t base, int exponent);
|
||||||
|
|
||||||
void AddUInt16(uint16_t operand);
|
|
||||||
void AddUInt64(uint64_t operand);
|
void AddUInt64(uint64_t operand);
|
||||||
void AddBignum(const Bignum& other);
|
void AddBignum(const Bignum& other);
|
||||||
// Precondition: this >= other.
|
// Precondition: this >= other.
|
||||||
|
@ -25,9 +25,9 @@
|
|||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cstdarg>
|
#include <stdarg.h>
|
||||||
#include <climits>
|
#include <limits.h>
|
||||||
#include <cmath>
|
#include <math.h>
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
||||||
@ -131,7 +131,6 @@ static const CachedPower kCachedPowers[] = {
|
|||||||
{UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
|
{UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
|
|
||||||
static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent.
|
static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent.
|
||||||
static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10)
|
static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10)
|
||||||
// Difference between the decimal exponents in the table above.
|
// Difference between the decimal exponents in the table above.
|
||||||
@ -149,9 +148,10 @@ void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
|
|||||||
int foo = kCachedPowersOffset;
|
int foo = kCachedPowersOffset;
|
||||||
int index =
|
int index =
|
||||||
(foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
|
(foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
|
||||||
ASSERT(0 <= index && index < kCachedPowersLength);
|
ASSERT(0 <= index && index < static_cast<int>(ARRAY_SIZE(kCachedPowers)));
|
||||||
CachedPower cached_power = kCachedPowers[index];
|
CachedPower cached_power = kCachedPowers[index];
|
||||||
ASSERT(min_exponent <= cached_power.binary_exponent);
|
ASSERT(min_exponent <= cached_power.binary_exponent);
|
||||||
|
(void) max_exponent; // Mark variable as used.
|
||||||
ASSERT(cached_power.binary_exponent <= max_exponent);
|
ASSERT(cached_power.binary_exponent <= max_exponent);
|
||||||
*decimal_exponent = cached_power.decimal_exponent;
|
*decimal_exponent = cached_power.decimal_exponent;
|
||||||
*power = DiyFp(cached_power.significand, cached_power.binary_exponent);
|
*power = DiyFp(cached_power.significand, cached_power.binary_exponent);
|
||||||
|
@ -42,7 +42,7 @@ class DiyFp {
|
|||||||
static const int kSignificandSize = 64;
|
static const int kSignificandSize = 64;
|
||||||
|
|
||||||
DiyFp() : f_(0), e_(0) {}
|
DiyFp() : f_(0), e_(0) {}
|
||||||
DiyFp(uint64_t f, int e) : f_(f), e_(e) {}
|
DiyFp(uint64_t significand, int exponent) : f_(significand), e_(exponent) {}
|
||||||
|
|
||||||
// this = this - other.
|
// this = this - other.
|
||||||
// The exponents of both numbers must be the same and the significand of this
|
// The exponents of both numbers must be the same and the significand of this
|
||||||
@ -76,22 +76,22 @@ class DiyFp {
|
|||||||
|
|
||||||
void Normalize() {
|
void Normalize() {
|
||||||
ASSERT(f_ != 0);
|
ASSERT(f_ != 0);
|
||||||
uint64_t f = f_;
|
uint64_t significand = f_;
|
||||||
int e = e_;
|
int exponent = e_;
|
||||||
|
|
||||||
// This method is mainly called for normalizing boundaries. In general
|
// This method is mainly called for normalizing boundaries. In general
|
||||||
// boundaries need to be shifted by 10 bits. We thus optimize for this case.
|
// boundaries need to be shifted by 10 bits. We thus optimize for this case.
|
||||||
const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
|
const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
|
||||||
while ((f & k10MSBits) == 0) {
|
while ((significand & k10MSBits) == 0) {
|
||||||
f <<= 10;
|
significand <<= 10;
|
||||||
e -= 10;
|
exponent -= 10;
|
||||||
}
|
}
|
||||||
while ((f & kUint64MSB) == 0) {
|
while ((significand & kUint64MSB) == 0) {
|
||||||
f <<= 1;
|
significand <<= 1;
|
||||||
e--;
|
exponent--;
|
||||||
}
|
}
|
||||||
f_ = f;
|
f_ = significand;
|
||||||
e_ = e;
|
e_ = exponent;
|
||||||
}
|
}
|
||||||
|
|
||||||
static DiyFp Normalize(const DiyFp& a) {
|
static DiyFp Normalize(const DiyFp& a) {
|
||||||
|
@ -25,8 +25,8 @@
|
|||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <climits>
|
#include <limits.h>
|
||||||
#include <cmath>
|
#include <math.h>
|
||||||
|
|
||||||
#include "double-conversion.h"
|
#include "double-conversion.h"
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation(
|
|||||||
StringBuilder* result_builder) const {
|
StringBuilder* result_builder) const {
|
||||||
// Create a representation that is padded with zeros if needed.
|
// Create a representation that is padded with zeros if needed.
|
||||||
if (decimal_point <= 0) {
|
if (decimal_point <= 0) {
|
||||||
// "0.00000decimal_rep".
|
// "0.00000decimal_rep" or "0.000decimal_rep00".
|
||||||
result_builder->AddCharacter('0');
|
result_builder->AddCharacter('0');
|
||||||
if (digits_after_point > 0) {
|
if (digits_after_point > 0) {
|
||||||
result_builder->AddCharacter('.');
|
result_builder->AddCharacter('.');
|
||||||
@ -129,7 +129,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation(
|
|||||||
result_builder->AddPadding('0', remaining_digits);
|
result_builder->AddPadding('0', remaining_digits);
|
||||||
}
|
}
|
||||||
} else if (decimal_point >= length) {
|
} else if (decimal_point >= length) {
|
||||||
// "decimal_rep0000.00000" or "decimal_rep.0000"
|
// "decimal_rep0000.00000" or "decimal_rep.0000".
|
||||||
result_builder->AddSubstring(decimal_digits, length);
|
result_builder->AddSubstring(decimal_digits, length);
|
||||||
result_builder->AddPadding('0', decimal_point - length);
|
result_builder->AddPadding('0', decimal_point - length);
|
||||||
if (digits_after_point > 0) {
|
if (digits_after_point > 0) {
|
||||||
@ -137,7 +137,7 @@ void DoubleToStringConverter::CreateDecimalRepresentation(
|
|||||||
result_builder->AddPadding('0', digits_after_point);
|
result_builder->AddPadding('0', digits_after_point);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// "decima.l_rep000"
|
// "decima.l_rep000".
|
||||||
ASSERT(digits_after_point > 0);
|
ASSERT(digits_after_point > 0);
|
||||||
result_builder->AddSubstring(decimal_digits, decimal_point);
|
result_builder->AddSubstring(decimal_digits, decimal_point);
|
||||||
result_builder->AddCharacter('.');
|
result_builder->AddCharacter('.');
|
||||||
@ -348,7 +348,6 @@ static BignumDtoaMode DtoaToBignumDtoaMode(
|
|||||||
case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
|
case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return BIGNUM_DTOA_SHORTEST; // To silence compiler.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -403,8 +402,8 @@ void DoubleToStringConverter::DoubleToAscii(double v,
|
|||||||
vector, length, point);
|
vector, length, point);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
|
||||||
fast_worked = false;
|
fast_worked = false;
|
||||||
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
if (fast_worked) return;
|
if (fast_worked) return;
|
||||||
|
|
||||||
@ -417,8 +416,9 @@ void DoubleToStringConverter::DoubleToAscii(double v,
|
|||||||
|
|
||||||
// Consumes the given substring from the iterator.
|
// Consumes the given substring from the iterator.
|
||||||
// Returns false, if the substring does not match.
|
// Returns false, if the substring does not match.
|
||||||
static bool ConsumeSubString(const char** current,
|
template <class Iterator>
|
||||||
const char* end,
|
static bool ConsumeSubString(Iterator* current,
|
||||||
|
Iterator end,
|
||||||
const char* substring) {
|
const char* substring) {
|
||||||
ASSERT(**current == *substring);
|
ASSERT(**current == *substring);
|
||||||
for (substring++; *substring != '\0'; substring++) {
|
for (substring++; *substring != '\0'; substring++) {
|
||||||
@ -440,10 +440,36 @@ static bool ConsumeSubString(const char** current,
|
|||||||
const int kMaxSignificantDigits = 772;
|
const int kMaxSignificantDigits = 772;
|
||||||
|
|
||||||
|
|
||||||
|
static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 };
|
||||||
|
static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7);
|
||||||
|
|
||||||
|
|
||||||
|
static const uc16 kWhitespaceTable16[] = {
|
||||||
|
160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195,
|
||||||
|
8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279
|
||||||
|
};
|
||||||
|
static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16);
|
||||||
|
|
||||||
|
|
||||||
|
static bool isWhitespace(int x) {
|
||||||
|
if (x < 128) {
|
||||||
|
for (int i = 0; i < kWhitespaceTable7Length; i++) {
|
||||||
|
if (kWhitespaceTable7[i] == x) return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < kWhitespaceTable16Length; i++) {
|
||||||
|
if (kWhitespaceTable16[i] == x) return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Returns true if a nonspace found and false if the end has reached.
|
// Returns true if a nonspace found and false if the end has reached.
|
||||||
static inline bool AdvanceToNonspace(const char** current, const char* end) {
|
template <class Iterator>
|
||||||
|
static inline bool AdvanceToNonspace(Iterator* current, Iterator end) {
|
||||||
while (*current != end) {
|
while (*current != end) {
|
||||||
if (**current != ' ') return true;
|
if (!isWhitespace(**current)) return true;
|
||||||
++*current;
|
++*current;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -462,26 +488,57 @@ static double SignedZero(bool sign) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Returns true if 'c' is a decimal digit that is valid for the given radix.
|
||||||
|
//
|
||||||
|
// The function is small and could be inlined, but VS2012 emitted a warning
|
||||||
|
// because it constant-propagated the radix and concluded that the last
|
||||||
|
// condition was always true. By moving it into a separate function the
|
||||||
|
// compiler wouldn't warn anymore.
|
||||||
|
#if _MSC_VER
|
||||||
|
#pragma optimize("",off)
|
||||||
|
static bool IsDecimalDigitForRadix(int c, int radix) {
|
||||||
|
return '0' <= c && c <= '9' && (c - '0') < radix;
|
||||||
|
}
|
||||||
|
#pragma optimize("",on)
|
||||||
|
#else
|
||||||
|
static bool inline IsDecimalDigitForRadix(int c, int radix) {
|
||||||
|
return '0' <= c && c <= '9' && (c - '0') < radix;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// Returns true if 'c' is a character digit that is valid for the given radix.
|
||||||
|
// The 'a_character' should be 'a' or 'A'.
|
||||||
|
//
|
||||||
|
// The function is small and could be inlined, but VS2012 emitted a warning
|
||||||
|
// because it constant-propagated the radix and concluded that the first
|
||||||
|
// condition was always false. By moving it into a separate function the
|
||||||
|
// compiler wouldn't warn anymore.
|
||||||
|
static bool IsCharacterDigitForRadix(int c, int radix, char a_character) {
|
||||||
|
return radix > 10 && c >= a_character && c < a_character + radix - 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
|
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
|
||||||
template <int radix_log_2>
|
template <int radix_log_2, class Iterator>
|
||||||
static double RadixStringToIeee(const char* current,
|
static double RadixStringToIeee(Iterator* current,
|
||||||
const char* end,
|
Iterator end,
|
||||||
bool sign,
|
bool sign,
|
||||||
bool allow_trailing_junk,
|
bool allow_trailing_junk,
|
||||||
double junk_string_value,
|
double junk_string_value,
|
||||||
bool read_as_double,
|
bool read_as_double,
|
||||||
const char** trailing_pointer) {
|
bool* result_is_junk) {
|
||||||
ASSERT(current != end);
|
ASSERT(*current != end);
|
||||||
|
|
||||||
const int kDoubleSize = Double::kSignificandSize;
|
const int kDoubleSize = Double::kSignificandSize;
|
||||||
const int kSingleSize = Single::kSignificandSize;
|
const int kSingleSize = Single::kSignificandSize;
|
||||||
const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
|
const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
|
||||||
|
|
||||||
|
*result_is_junk = true;
|
||||||
|
|
||||||
// Skip leading 0s.
|
// Skip leading 0s.
|
||||||
while (*current == '0') {
|
while (**current == '0') {
|
||||||
++current;
|
++(*current);
|
||||||
if (current == end) {
|
if (*current == end) {
|
||||||
*trailing_pointer = end;
|
*result_is_junk = false;
|
||||||
return SignedZero(sign);
|
return SignedZero(sign);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -492,14 +549,14 @@ static double RadixStringToIeee(const char* current,
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
int digit;
|
int digit;
|
||||||
if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
|
if (IsDecimalDigitForRadix(**current, radix)) {
|
||||||
digit = static_cast<char>(*current) - '0';
|
digit = static_cast<char>(**current) - '0';
|
||||||
} else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
|
} else if (IsCharacterDigitForRadix(**current, radix, 'a')) {
|
||||||
digit = static_cast<char>(*current) - 'a' + 10;
|
digit = static_cast<char>(**current) - 'a' + 10;
|
||||||
} else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
|
} else if (IsCharacterDigitForRadix(**current, radix, 'A')) {
|
||||||
digit = static_cast<char>(*current) - 'A' + 10;
|
digit = static_cast<char>(**current) - 'A' + 10;
|
||||||
} else {
|
} else {
|
||||||
if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) {
|
if (allow_trailing_junk || !AdvanceToNonspace(current, end)) {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
return junk_string_value;
|
return junk_string_value;
|
||||||
@ -523,14 +580,14 @@ static double RadixStringToIeee(const char* current,
|
|||||||
exponent = overflow_bits_count;
|
exponent = overflow_bits_count;
|
||||||
|
|
||||||
bool zero_tail = true;
|
bool zero_tail = true;
|
||||||
while (true) {
|
for (;;) {
|
||||||
++current;
|
++(*current);
|
||||||
if (current == end || !isDigit(*current, radix)) break;
|
if (*current == end || !isDigit(**current, radix)) break;
|
||||||
zero_tail = zero_tail && *current == '0';
|
zero_tail = zero_tail && **current == '0';
|
||||||
exponent += radix_log_2;
|
exponent += radix_log_2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
|
if (!allow_trailing_junk && AdvanceToNonspace(current, end)) {
|
||||||
return junk_string_value;
|
return junk_string_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -552,13 +609,13 @@ static double RadixStringToIeee(const char* current,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
++current;
|
++(*current);
|
||||||
} while (current != end);
|
} while (*current != end);
|
||||||
|
|
||||||
ASSERT(number < ((int64_t)1 << kSignificandSize));
|
ASSERT(number < ((int64_t)1 << kSignificandSize));
|
||||||
ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
|
ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
|
||||||
|
|
||||||
*trailing_pointer = current;
|
*result_is_junk = false;
|
||||||
|
|
||||||
if (exponent == 0) {
|
if (exponent == 0) {
|
||||||
if (sign) {
|
if (sign) {
|
||||||
@ -573,13 +630,14 @@ static double RadixStringToIeee(const char* current,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <class Iterator>
|
||||||
double StringToDoubleConverter::StringToIeee(
|
double StringToDoubleConverter::StringToIeee(
|
||||||
const char* input,
|
Iterator input,
|
||||||
int length,
|
int length,
|
||||||
int* processed_characters_count,
|
bool read_as_double,
|
||||||
bool read_as_double) const {
|
int* processed_characters_count) const {
|
||||||
const char* current = input;
|
Iterator current = input;
|
||||||
const char* end = input + length;
|
Iterator end = input + length;
|
||||||
|
|
||||||
*processed_characters_count = 0;
|
*processed_characters_count = 0;
|
||||||
|
|
||||||
@ -600,7 +658,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
|
|
||||||
if (allow_leading_spaces || allow_trailing_spaces) {
|
if (allow_leading_spaces || allow_trailing_spaces) {
|
||||||
if (!AdvanceToNonspace(¤t, end)) {
|
if (!AdvanceToNonspace(¤t, end)) {
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return empty_string_value_;
|
return empty_string_value_;
|
||||||
}
|
}
|
||||||
if (!allow_leading_spaces && (input != current)) {
|
if (!allow_leading_spaces && (input != current)) {
|
||||||
@ -626,7 +684,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
if (*current == '+' || *current == '-') {
|
if (*current == '+' || *current == '-') {
|
||||||
sign = (*current == '-');
|
sign = (*current == '-');
|
||||||
++current;
|
++current;
|
||||||
const char* next_non_space = current;
|
Iterator next_non_space = current;
|
||||||
// Skip following spaces (if allowed).
|
// Skip following spaces (if allowed).
|
||||||
if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
|
if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
|
||||||
if (!allow_spaces_after_sign && (current != next_non_space)) {
|
if (!allow_spaces_after_sign && (current != next_non_space)) {
|
||||||
@ -649,7 +707,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(buffer_pos == 0);
|
ASSERT(buffer_pos == 0);
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return sign ? -Double::Infinity() : Double::Infinity();
|
return sign ? -Double::Infinity() : Double::Infinity();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -668,7 +726,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(buffer_pos == 0);
|
ASSERT(buffer_pos == 0);
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return sign ? -Double::NaN() : Double::NaN();
|
return sign ? -Double::NaN() : Double::NaN();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -677,7 +735,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
if (*current == '0') {
|
if (*current == '0') {
|
||||||
++current;
|
++current;
|
||||||
if (current == end) {
|
if (current == end) {
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return SignedZero(sign);
|
return SignedZero(sign);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -690,17 +748,17 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
return junk_string_value_; // "0x".
|
return junk_string_value_; // "0x".
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* tail_pointer = NULL;
|
bool result_is_junk;
|
||||||
double result = RadixStringToIeee<4>(current,
|
double result = RadixStringToIeee<4>(¤t,
|
||||||
end,
|
end,
|
||||||
sign,
|
sign,
|
||||||
allow_trailing_junk,
|
allow_trailing_junk,
|
||||||
junk_string_value_,
|
junk_string_value_,
|
||||||
read_as_double,
|
read_as_double,
|
||||||
&tail_pointer);
|
&result_is_junk);
|
||||||
if (tail_pointer != NULL) {
|
if (!result_is_junk) {
|
||||||
if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
|
if (allow_trailing_spaces) AdvanceToNonspace(¤t, end);
|
||||||
*processed_characters_count = tail_pointer - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -709,7 +767,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
while (*current == '0') {
|
while (*current == '0') {
|
||||||
++current;
|
++current;
|
||||||
if (current == end) {
|
if (current == end) {
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return SignedZero(sign);
|
return SignedZero(sign);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -757,7 +815,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
while (*current == '0') {
|
while (*current == '0') {
|
||||||
++current;
|
++current;
|
||||||
if (current == end) {
|
if (current == end) {
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return SignedZero(sign);
|
return SignedZero(sign);
|
||||||
}
|
}
|
||||||
exponent--; // Move this 0 into the exponent.
|
exponent--; // Move this 0 into the exponent.
|
||||||
@ -801,9 +859,9 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
return junk_string_value_;
|
return junk_string_value_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
char sign = '+';
|
char exponen_sign = '+';
|
||||||
if (*current == '+' || *current == '-') {
|
if (*current == '+' || *current == '-') {
|
||||||
sign = static_cast<char>(*current);
|
exponen_sign = static_cast<char>(*current);
|
||||||
++current;
|
++current;
|
||||||
if (current == end) {
|
if (current == end) {
|
||||||
if (allow_trailing_junk) {
|
if (allow_trailing_junk) {
|
||||||
@ -837,7 +895,7 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
++current;
|
++current;
|
||||||
} while (current != end && *current >= '0' && *current <= '9');
|
} while (current != end && *current >= '0' && *current <= '9');
|
||||||
|
|
||||||
exponent += (sign == '-' ? -num : num);
|
exponent += (exponen_sign == '-' ? -num : num);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
|
if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
|
||||||
@ -855,16 +913,17 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
|
|
||||||
if (octal) {
|
if (octal) {
|
||||||
double result;
|
double result;
|
||||||
const char* tail_pointer = NULL;
|
bool result_is_junk;
|
||||||
result = RadixStringToIeee<3>(buffer,
|
char* start = buffer;
|
||||||
|
result = RadixStringToIeee<3>(&start,
|
||||||
buffer + buffer_pos,
|
buffer + buffer_pos,
|
||||||
sign,
|
sign,
|
||||||
allow_trailing_junk,
|
allow_trailing_junk,
|
||||||
junk_string_value_,
|
junk_string_value_,
|
||||||
read_as_double,
|
read_as_double,
|
||||||
&tail_pointer);
|
&result_is_junk);
|
||||||
ASSERT(tail_pointer != NULL);
|
ASSERT(!result_is_junk);
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -882,8 +941,42 @@ double StringToDoubleConverter::StringToIeee(
|
|||||||
} else {
|
} else {
|
||||||
converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
|
converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
|
||||||
}
|
}
|
||||||
*processed_characters_count = current - input;
|
*processed_characters_count = static_cast<int>(current - input);
|
||||||
return sign? -converted: converted;
|
return sign? -converted: converted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double StringToDoubleConverter::StringToDouble(
|
||||||
|
const char* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const {
|
||||||
|
return StringToIeee(buffer, length, true, processed_characters_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double StringToDoubleConverter::StringToDouble(
|
||||||
|
const uc16* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const {
|
||||||
|
return StringToIeee(buffer, length, true, processed_characters_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float StringToDoubleConverter::StringToFloat(
|
||||||
|
const char* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const {
|
||||||
|
return static_cast<float>(StringToIeee(buffer, length, false,
|
||||||
|
processed_characters_count));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float StringToDoubleConverter::StringToFloat(
|
||||||
|
const uc16* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const {
|
||||||
|
return static_cast<float>(StringToIeee(buffer, length, false,
|
||||||
|
processed_characters_count));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace kenlm_double_conversion
|
} // namespace kenlm_double_conversion
|
||||||
|
@ -415,9 +415,10 @@ class StringToDoubleConverter {
|
|||||||
// junk, too.
|
// junk, too.
|
||||||
// - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
|
// - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
|
||||||
// a double literal.
|
// a double literal.
|
||||||
// - ALLOW_LEADING_SPACES: skip over leading spaces.
|
// - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces,
|
||||||
// - ALLOW_TRAILING_SPACES: ignore trailing spaces.
|
// new-lines, and tabs.
|
||||||
// - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
|
// - ALLOW_TRAILING_SPACES: ignore trailing whitespace.
|
||||||
|
// - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign.
|
||||||
// Ex: StringToDouble("- 123.2") -> -123.2.
|
// Ex: StringToDouble("- 123.2") -> -123.2.
|
||||||
// StringToDouble("+ 123.2") -> 123.2
|
// StringToDouble("+ 123.2") -> 123.2
|
||||||
//
|
//
|
||||||
@ -502,19 +503,24 @@ class StringToDoubleConverter {
|
|||||||
// in the 'processed_characters_count'. Trailing junk is never included.
|
// in the 'processed_characters_count'. Trailing junk is never included.
|
||||||
double StringToDouble(const char* buffer,
|
double StringToDouble(const char* buffer,
|
||||||
int length,
|
int length,
|
||||||
int* processed_characters_count) const {
|
int* processed_characters_count) const;
|
||||||
return StringToIeee(buffer, length, processed_characters_count, true);
|
|
||||||
}
|
// Same as StringToDouble above but for 16 bit characters.
|
||||||
|
double StringToDouble(const uc16* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const;
|
||||||
|
|
||||||
// Same as StringToDouble but reads a float.
|
// Same as StringToDouble but reads a float.
|
||||||
// Note that this is not equivalent to static_cast<float>(StringToDouble(...))
|
// Note that this is not equivalent to static_cast<float>(StringToDouble(...))
|
||||||
// due to potential double-rounding.
|
// due to potential double-rounding.
|
||||||
float StringToFloat(const char* buffer,
|
float StringToFloat(const char* buffer,
|
||||||
int length,
|
int length,
|
||||||
int* processed_characters_count) const {
|
int* processed_characters_count) const;
|
||||||
return static_cast<float>(StringToIeee(buffer, length,
|
|
||||||
processed_characters_count, false));
|
// Same as StringToFloat above but for 16 bit characters.
|
||||||
}
|
float StringToFloat(const uc16* buffer,
|
||||||
|
int length,
|
||||||
|
int* processed_characters_count) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const int flags_;
|
const int flags_;
|
||||||
@ -523,10 +529,11 @@ class StringToDoubleConverter {
|
|||||||
const char* const infinity_symbol_;
|
const char* const infinity_symbol_;
|
||||||
const char* const nan_symbol_;
|
const char* const nan_symbol_;
|
||||||
|
|
||||||
double StringToIeee(const char* buffer,
|
template <class Iterator>
|
||||||
|
double StringToIeee(Iterator start_pointer,
|
||||||
int length,
|
int length,
|
||||||
int* processed_characters_count,
|
bool read_as_double,
|
||||||
bool read_as_double) const;
|
int* processed_characters_count) const;
|
||||||
|
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
|
||||||
};
|
};
|
||||||
|
@ -248,10 +248,7 @@ static void BiggestPowerTen(uint32_t number,
|
|||||||
// Note: kPowersOf10[i] == 10^(i-1).
|
// Note: kPowersOf10[i] == 10^(i-1).
|
||||||
exponent_plus_one_guess++;
|
exponent_plus_one_guess++;
|
||||||
// We don't have any guarantees that 2^number_bits <= number.
|
// We don't have any guarantees that 2^number_bits <= number.
|
||||||
// TODO(floitsch): can we change the 'while' into an 'if'? We definitely see
|
if (number < kSmallPowersOfTen[exponent_plus_one_guess]) {
|
||||||
// number < (2^number_bits - 1), but I haven't encountered
|
|
||||||
// number < (2^number_bits - 2) yet.
|
|
||||||
while (number < kSmallPowersOfTen[exponent_plus_one_guess]) {
|
|
||||||
exponent_plus_one_guess--;
|
exponent_plus_one_guess--;
|
||||||
}
|
}
|
||||||
*power = kSmallPowersOfTen[exponent_plus_one_guess];
|
*power = kSmallPowersOfTen[exponent_plus_one_guess];
|
||||||
@ -350,7 +347,8 @@ static bool DigitGen(DiyFp low,
|
|||||||
// that is smaller than integrals.
|
// that is smaller than integrals.
|
||||||
while (*kappa > 0) {
|
while (*kappa > 0) {
|
||||||
int digit = integrals / divisor;
|
int digit = integrals / divisor;
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
integrals %= divisor;
|
integrals %= divisor;
|
||||||
(*kappa)--;
|
(*kappa)--;
|
||||||
@ -379,13 +377,14 @@ static bool DigitGen(DiyFp low,
|
|||||||
ASSERT(one.e() >= -60);
|
ASSERT(one.e() >= -60);
|
||||||
ASSERT(fractionals < one.f());
|
ASSERT(fractionals < one.f());
|
||||||
ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
|
ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
|
||||||
while (true) {
|
for (;;) {
|
||||||
fractionals *= 10;
|
fractionals *= 10;
|
||||||
unit *= 10;
|
unit *= 10;
|
||||||
unsafe_interval.set_f(unsafe_interval.f() * 10);
|
unsafe_interval.set_f(unsafe_interval.f() * 10);
|
||||||
// Integer division by one.
|
// Integer division by one.
|
||||||
int digit = static_cast<int>(fractionals >> -one.e());
|
int digit = static_cast<int>(fractionals >> -one.e());
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
fractionals &= one.f() - 1; // Modulo by one.
|
fractionals &= one.f() - 1; // Modulo by one.
|
||||||
(*kappa)--;
|
(*kappa)--;
|
||||||
@ -459,7 +458,8 @@ static bool DigitGenCounted(DiyFp w,
|
|||||||
// that is smaller than 'integrals'.
|
// that is smaller than 'integrals'.
|
||||||
while (*kappa > 0) {
|
while (*kappa > 0) {
|
||||||
int digit = integrals / divisor;
|
int digit = integrals / divisor;
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
requested_digits--;
|
requested_digits--;
|
||||||
integrals %= divisor;
|
integrals %= divisor;
|
||||||
@ -492,7 +492,8 @@ static bool DigitGenCounted(DiyFp w,
|
|||||||
w_error *= 10;
|
w_error *= 10;
|
||||||
// Integer division by one.
|
// Integer division by one.
|
||||||
int digit = static_cast<int>(fractionals >> -one.e());
|
int digit = static_cast<int>(fractionals >> -one.e());
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
requested_digits--;
|
requested_digits--;
|
||||||
fractionals &= one.f() - 1; // Modulo by one.
|
fractionals &= one.f() - 1; // Modulo by one.
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cmath>
|
#include <math.h>
|
||||||
|
|
||||||
#include "fixed-dtoa.h"
|
#include "fixed-dtoa.h"
|
||||||
#include "ieee.h"
|
#include "ieee.h"
|
||||||
@ -98,7 +98,7 @@ class UInt128 {
|
|||||||
return high_bits_ == 0 && low_bits_ == 0;
|
return high_bits_ == 0 && low_bits_ == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int BitAt(int position) {
|
int BitAt(int position) const {
|
||||||
if (position >= 64) {
|
if (position >= 64) {
|
||||||
return static_cast<int>(high_bits_ >> (position - 64)) & 1;
|
return static_cast<int>(high_bits_ >> (position - 64)) & 1;
|
||||||
} else {
|
} else {
|
||||||
@ -133,7 +133,7 @@ static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
|
|||||||
while (number != 0) {
|
while (number != 0) {
|
||||||
int digit = number % 10;
|
int digit = number % 10;
|
||||||
number /= 10;
|
number /= 10;
|
||||||
buffer[(*length) + number_length] = '0' + digit;
|
buffer[(*length) + number_length] = static_cast<char>('0' + digit);
|
||||||
number_length++;
|
number_length++;
|
||||||
}
|
}
|
||||||
// Exchange the digits.
|
// Exchange the digits.
|
||||||
@ -150,7 +150,7 @@ static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void FillDigits64FixedLength(uint64_t number, int requested_length,
|
static void FillDigits64FixedLength(uint64_t number,
|
||||||
Vector<char> buffer, int* length) {
|
Vector<char> buffer, int* length) {
|
||||||
const uint32_t kTen7 = 10000000;
|
const uint32_t kTen7 = 10000000;
|
||||||
// For efficiency cut the number into 3 uint32_t parts, and print those.
|
// For efficiency cut the number into 3 uint32_t parts, and print those.
|
||||||
@ -253,12 +253,14 @@ static void FillFractionals(uint64_t fractionals, int exponent,
|
|||||||
fractionals *= 5;
|
fractionals *= 5;
|
||||||
point--;
|
point--;
|
||||||
int digit = static_cast<int>(fractionals >> point);
|
int digit = static_cast<int>(fractionals >> point);
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
fractionals -= static_cast<uint64_t>(digit) << point;
|
fractionals -= static_cast<uint64_t>(digit) << point;
|
||||||
}
|
}
|
||||||
// If the first bit after the point is set we have to round up.
|
// If the first bit after the point is set we have to round up.
|
||||||
if (((fractionals >> (point - 1)) & 1) == 1) {
|
ASSERT(fractionals == 0 || point - 1 >= 0);
|
||||||
|
if ((fractionals != 0) && ((fractionals >> (point - 1)) & 1) == 1) {
|
||||||
RoundUp(buffer, length, decimal_point);
|
RoundUp(buffer, length, decimal_point);
|
||||||
}
|
}
|
||||||
} else { // We need 128 bits.
|
} else { // We need 128 bits.
|
||||||
@ -274,7 +276,8 @@ static void FillFractionals(uint64_t fractionals, int exponent,
|
|||||||
fractionals128.Multiply(5);
|
fractionals128.Multiply(5);
|
||||||
point--;
|
point--;
|
||||||
int digit = fractionals128.DivModPowerOf2(point);
|
int digit = fractionals128.DivModPowerOf2(point);
|
||||||
buffer[*length] = '0' + digit;
|
ASSERT(digit <= 9);
|
||||||
|
buffer[*length] = static_cast<char>('0' + digit);
|
||||||
(*length)++;
|
(*length)++;
|
||||||
}
|
}
|
||||||
if (fractionals128.BitAt(point - 1) == 1) {
|
if (fractionals128.BitAt(point - 1) == 1) {
|
||||||
@ -358,7 +361,7 @@ bool FastFixedDtoa(double v,
|
|||||||
remainder = (dividend % divisor) << exponent;
|
remainder = (dividend % divisor) << exponent;
|
||||||
}
|
}
|
||||||
FillDigits32(quotient, buffer, length);
|
FillDigits32(quotient, buffer, length);
|
||||||
FillDigits64FixedLength(remainder, divisor_power, buffer, length);
|
FillDigits64FixedLength(remainder, buffer, length);
|
||||||
*decimal_point = *length;
|
*decimal_point = *length;
|
||||||
} else if (exponent >= 0) {
|
} else if (exponent >= 0) {
|
||||||
// 0 <= exponent <= 11
|
// 0 <= exponent <= 11
|
||||||
|
@ -99,7 +99,7 @@ class Double {
|
|||||||
}
|
}
|
||||||
|
|
||||||
double PreviousDouble() const {
|
double PreviousDouble() const {
|
||||||
if (d64_ == (kInfinity | kSignMask)) return -Double::Infinity();
|
if (d64_ == (kInfinity | kSignMask)) return -Infinity();
|
||||||
if (Sign() < 0) {
|
if (Sign() < 0) {
|
||||||
return Double(d64_ + 1).value();
|
return Double(d64_ + 1).value();
|
||||||
} else {
|
} else {
|
||||||
@ -256,6 +256,8 @@ class Double {
|
|||||||
return (significand & kSignificandMask) |
|
return (significand & kSignificandMask) |
|
||||||
(biased_exponent << kPhysicalSignificandSize);
|
(biased_exponent << kPhysicalSignificandSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Double);
|
||||||
};
|
};
|
||||||
|
|
||||||
class Single {
|
class Single {
|
||||||
@ -391,6 +393,8 @@ class Single {
|
|||||||
static const uint32_t kNaN = 0x7FC00000;
|
static const uint32_t kNaN = 0x7FC00000;
|
||||||
|
|
||||||
const uint32_t d32_;
|
const uint32_t d32_;
|
||||||
|
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(Single);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace kenlm_double_conversion
|
} // namespace kenlm_double_conversion
|
||||||
|
@ -25,8 +25,8 @@
|
|||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include <cstdarg>
|
#include <stdarg.h>
|
||||||
#include <climits>
|
#include <limits.h>
|
||||||
|
|
||||||
#include "strtod.h"
|
#include "strtod.h"
|
||||||
#include "bignum.h"
|
#include "bignum.h"
|
||||||
@ -137,6 +137,7 @@ static void TrimAndCut(Vector<const char> buffer, int exponent,
|
|||||||
Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
|
Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
|
||||||
exponent += left_trimmed.length() - right_trimmed.length();
|
exponent += left_trimmed.length() - right_trimmed.length();
|
||||||
if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
|
if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
|
||||||
|
(void) space_size; // Mark variable as used.
|
||||||
ASSERT(space_size >= kMaxSignificantDecimalDigits);
|
ASSERT(space_size >= kMaxSignificantDecimalDigits);
|
||||||
CutToMaxSignificantDigits(right_trimmed, exponent,
|
CutToMaxSignificantDigits(right_trimmed, exponent,
|
||||||
buffer_copy_space, updated_exponent);
|
buffer_copy_space, updated_exponent);
|
||||||
@ -263,7 +264,6 @@ static DiyFp AdjustmentPowerOfTen(int exponent) {
|
|||||||
case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
|
case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return DiyFp(0, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,7 +286,7 @@ static bool DiyFpStrtod(Vector<const char> buffer,
|
|||||||
const int kDenominator = 1 << kDenominatorLog;
|
const int kDenominator = 1 << kDenominatorLog;
|
||||||
// Move the remaining decimals into the exponent.
|
// Move the remaining decimals into the exponent.
|
||||||
exponent += remaining_decimals;
|
exponent += remaining_decimals;
|
||||||
int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
|
uint64_t error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
|
||||||
|
|
||||||
int old_e = input.e();
|
int old_e = input.e();
|
||||||
input.Normalize();
|
input.Normalize();
|
||||||
@ -506,9 +506,7 @@ float Strtof(Vector<const char> buffer, int exponent) {
|
|||||||
double double_previous = Double(double_guess).PreviousDouble();
|
double double_previous = Double(double_guess).PreviousDouble();
|
||||||
|
|
||||||
float f1 = static_cast<float>(double_previous);
|
float f1 = static_cast<float>(double_previous);
|
||||||
#ifndef NDEBUG
|
|
||||||
float f2 = float_guess;
|
float f2 = float_guess;
|
||||||
#endif
|
|
||||||
float f3 = static_cast<float>(double_next);
|
float f3 = static_cast<float>(double_next);
|
||||||
float f4;
|
float f4;
|
||||||
if (is_correct) {
|
if (is_correct) {
|
||||||
@ -517,9 +515,8 @@ float Strtof(Vector<const char> buffer, int exponent) {
|
|||||||
double double_next2 = Double(double_next).NextDouble();
|
double double_next2 = Double(double_next).NextDouble();
|
||||||
f4 = static_cast<float>(double_next2);
|
f4 = static_cast<float>(double_next2);
|
||||||
}
|
}
|
||||||
#ifndef NDEBUG
|
(void) f2; // Mark variable as used.
|
||||||
ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
|
ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
|
||||||
#endif
|
|
||||||
|
|
||||||
// If the guess doesn't lie near a single-precision boundary we can simply
|
// If the guess doesn't lie near a single-precision boundary we can simply
|
||||||
// return its float-value.
|
// return its float-value.
|
||||||
|
@ -33,14 +33,29 @@
|
|||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#ifndef ASSERT
|
#ifndef ASSERT
|
||||||
#define ASSERT(condition) (assert(condition))
|
#define ASSERT(condition) \
|
||||||
|
assert(condition);
|
||||||
#endif
|
#endif
|
||||||
#ifndef UNIMPLEMENTED
|
#ifndef UNIMPLEMENTED
|
||||||
#define UNIMPLEMENTED() (abort())
|
#define UNIMPLEMENTED() (abort())
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef DOUBLE_CONVERSION_NO_RETURN
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn)
|
||||||
|
#else
|
||||||
|
#define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#ifndef UNREACHABLE
|
#ifndef UNREACHABLE
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
void DOUBLE_CONVERSION_NO_RETURN abort_noreturn();
|
||||||
|
inline void abort_noreturn() { abort(); }
|
||||||
|
#define UNREACHABLE() (abort_noreturn())
|
||||||
|
#else
|
||||||
#define UNREACHABLE() (abort())
|
#define UNREACHABLE() (abort())
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// Double operations detection based on target architecture.
|
// Double operations detection based on target architecture.
|
||||||
// Linux uses a 80bit wide floating point stack on x86. This induces double
|
// Linux uses a 80bit wide floating point stack on x86. This induces double
|
||||||
@ -55,11 +70,17 @@
|
|||||||
#if defined(_M_X64) || defined(__x86_64__) || \
|
#if defined(_M_X64) || defined(__x86_64__) || \
|
||||||
defined(__ARMEL__) || defined(__avr32__) || \
|
defined(__ARMEL__) || defined(__avr32__) || \
|
||||||
defined(__hppa__) || defined(__ia64__) || \
|
defined(__hppa__) || defined(__ia64__) || \
|
||||||
defined(__mips__) || defined(__powerpc__) || \
|
defined(__mips__) || \
|
||||||
|
defined(__powerpc__) || defined(__ppc__) || defined(__ppc64__) || \
|
||||||
|
defined(_POWER) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
|
||||||
defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
|
defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
|
||||||
defined(__SH4__) || defined(__alpha__) || \
|
defined(__SH4__) || defined(__alpha__) || \
|
||||||
defined(_MIPS_ARCH_MIPS32R2) || defined(__aarch64__)
|
defined(_MIPS_ARCH_MIPS32R2) || \
|
||||||
|
defined(__AARCH64EL__) || defined(__aarch64__) || \
|
||||||
|
defined(__riscv)
|
||||||
#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
|
#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
|
||||||
|
#elif defined(__mc68000__)
|
||||||
|
#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
|
||||||
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
|
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
// Windows uses a 64bit wide floating point stack.
|
// Windows uses a 64bit wide floating point stack.
|
||||||
@ -71,6 +92,11 @@
|
|||||||
#error Target architecture was not detected as supported by Double-Conversion.
|
#error Target architecture was not detected as supported by Double-Conversion.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__)
|
||||||
|
#define DOUBLE_CONVERSION_UNUSED __attribute__((unused))
|
||||||
|
#else
|
||||||
|
#define DOUBLE_CONVERSION_UNUSED
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_WIN32) && !defined(__MINGW32__)
|
#if defined(_WIN32) && !defined(__MINGW32__)
|
||||||
|
|
||||||
@ -90,6 +116,8 @@ typedef unsigned __int64 uint64_t;
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef uint16_t uc16;
|
||||||
|
|
||||||
// The following macro works on both 32 and 64-bit platforms.
|
// The following macro works on both 32 and 64-bit platforms.
|
||||||
// Usage: instead of writing 0x1234567890123456
|
// Usage: instead of writing 0x1234567890123456
|
||||||
// write UINT64_2PART_C(0x12345678,90123456);
|
// write UINT64_2PART_C(0x12345678,90123456);
|
||||||
@ -155,8 +183,8 @@ template <typename T>
|
|||||||
class Vector {
|
class Vector {
|
||||||
public:
|
public:
|
||||||
Vector() : start_(NULL), length_(0) {}
|
Vector() : start_(NULL), length_(0) {}
|
||||||
Vector(T* data, int length) : start_(data), length_(length) {
|
Vector(T* data, int len) : start_(data), length_(len) {
|
||||||
ASSERT(length == 0 || (length > 0 && data != NULL));
|
ASSERT(len == 0 || (len > 0 && data != NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a vector using the same backing storage as this one,
|
// Returns a vector using the same backing storage as this one,
|
||||||
@ -198,8 +226,8 @@ class Vector {
|
|||||||
// buffer bounds on all operations in debug mode.
|
// buffer bounds on all operations in debug mode.
|
||||||
class StringBuilder {
|
class StringBuilder {
|
||||||
public:
|
public:
|
||||||
StringBuilder(char* buffer, int size)
|
StringBuilder(char* buffer, int buffer_size)
|
||||||
: buffer_(buffer, size), position_(0) { }
|
: buffer_(buffer, buffer_size), position_(0) { }
|
||||||
|
|
||||||
~StringBuilder() { if (!is_finalized()) Finalize(); }
|
~StringBuilder() { if (!is_finalized()) Finalize(); }
|
||||||
|
|
||||||
@ -218,8 +246,7 @@ class StringBuilder {
|
|||||||
// 0-characters; use the Finalize() method to terminate the string
|
// 0-characters; use the Finalize() method to terminate the string
|
||||||
// instead.
|
// instead.
|
||||||
void AddCharacter(char c) {
|
void AddCharacter(char c) {
|
||||||
// I just extract raw data not a cstr so null is fine.
|
ASSERT(c != '\0');
|
||||||
//ASSERT(c != '\0');
|
|
||||||
ASSERT(!is_finalized() && position_ < buffer_.length());
|
ASSERT(!is_finalized() && position_ < buffer_.length());
|
||||||
buffer_[position_++] = c;
|
buffer_[position_++] = c;
|
||||||
}
|
}
|
||||||
@ -234,8 +261,7 @@ class StringBuilder {
|
|||||||
// builder. The input string must have enough characters.
|
// builder. The input string must have enough characters.
|
||||||
void AddSubstring(const char* s, int n) {
|
void AddSubstring(const char* s, int n) {
|
||||||
ASSERT(!is_finalized() && position_ + n < buffer_.length());
|
ASSERT(!is_finalized() && position_ + n < buffer_.length());
|
||||||
// I just extract raw data not a cstr so null is fine.
|
ASSERT(static_cast<size_t>(n) <= strlen(s));
|
||||||
//ASSERT(static_cast<size_t>(n) <= strlen(s));
|
|
||||||
memmove(&buffer_[position_], s, n * kCharSize);
|
memmove(&buffer_[position_], s, n * kCharSize);
|
||||||
position_ += n;
|
position_ += n;
|
||||||
}
|
}
|
||||||
@ -255,8 +281,7 @@ class StringBuilder {
|
|||||||
buffer_[position_] = '\0';
|
buffer_[position_] = '\0';
|
||||||
// Make sure nobody managed to add a 0-character to the
|
// Make sure nobody managed to add a 0-character to the
|
||||||
// buffer while building the string.
|
// buffer while building the string.
|
||||||
// I just extract raw data not a cstr so null is fine.
|
ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
|
||||||
//ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
|
|
||||||
position_ = -1;
|
position_ = -1;
|
||||||
ASSERT(is_finalized());
|
ASSERT(is_finalized());
|
||||||
return buffer_.start();
|
return buffer_.start();
|
||||||
@ -299,11 +324,8 @@ template <class Dest, class Source>
|
|||||||
inline Dest BitCast(const Source& source) {
|
inline Dest BitCast(const Source& source) {
|
||||||
// Compile time assertion: sizeof(Dest) == sizeof(Source)
|
// Compile time assertion: sizeof(Dest) == sizeof(Source)
|
||||||
// A compile error here means your Dest and Source have different sizes.
|
// A compile error here means your Dest and Source have different sizes.
|
||||||
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
|
DOUBLE_CONVERSION_UNUSED
|
||||||
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
|
typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
|
||||||
__attribute__((unused))
|
|
||||||
#endif
|
|
||||||
;
|
|
||||||
|
|
||||||
Dest dest;
|
Dest dest;
|
||||||
memmove(&dest, &source, sizeof(dest));
|
memmove(&dest, &source, sizeof(dest));
|
||||||
|
@ -134,7 +134,7 @@ class OverflowException : public Exception {
|
|||||||
|
|
||||||
template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
|
template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
|
||||||
UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
|
UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
|
||||||
return value;
|
return static_cast<std::size_t>(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
|
template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
|
||||||
|
@ -490,7 +490,7 @@ int
|
|||||||
mkstemp_and_unlink(char *tmpl) {
|
mkstemp_and_unlink(char *tmpl) {
|
||||||
int ret = mkstemp(tmpl);
|
int ret = mkstemp(tmpl);
|
||||||
if (ret != -1) {
|
if (ret != -1) {
|
||||||
UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
|
UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting " << tmpl);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -103,7 +103,7 @@ class FilePiece {
|
|||||||
if (position_ == position_end_) {
|
if (position_ == position_end_) {
|
||||||
try {
|
try {
|
||||||
Shift();
|
Shift();
|
||||||
} catch (const util::EndOfFileException &e) { return false; }
|
} catch (const util::EndOfFileException &) { return false; }
|
||||||
// And break out at end of file.
|
// And break out at end of file.
|
||||||
if (position_ == position_end_) return false;
|
if (position_ == position_end_) return false;
|
||||||
}
|
}
|
||||||
|
@ -142,7 +142,7 @@ void UnmapOrThrow(void *start, size_t length) {
|
|||||||
#if defined(_WIN32) || defined(_WIN64)
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
|
UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
|
||||||
#else
|
#else
|
||||||
UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
|
UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed with " << start << " for length " << length);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ class DivMod {
|
|||||||
public:
|
public:
|
||||||
explicit DivMod(std::size_t buckets) : buckets_(buckets) {}
|
explicit DivMod(std::size_t buckets) : buckets_(buckets) {}
|
||||||
|
|
||||||
static std::size_t RoundBuckets(std::size_t from) {
|
static uint64_t RoundBuckets(uint64_t from) {
|
||||||
return from;
|
return from;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,7 +58,7 @@ class Power2Mod {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Round up to next power of 2.
|
// Round up to next power of 2.
|
||||||
static std::size_t RoundBuckets(std::size_t from) {
|
static uint64_t RoundBuckets(uint64_t from) {
|
||||||
--from;
|
--from;
|
||||||
from |= from >> 1;
|
from |= from >> 1;
|
||||||
from |= from >> 2;
|
from |= from >> 2;
|
||||||
|
@ -5,10 +5,9 @@
|
|||||||
#include "util/spaces.hh"
|
#include "util/spaces.hh"
|
||||||
#include "util/string_piece.hh"
|
#include "util/string_piece.hh"
|
||||||
|
|
||||||
#include <boost/iterator/iterator_facade.hpp>
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
namespace util {
|
namespace util {
|
||||||
|
|
||||||
@ -97,12 +96,12 @@ class AnyCharacterLast {
|
|||||||
StringPiece chars_;
|
StringPiece chars_;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class Find, bool SkipEmpty = false> class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEmpty>, const StringPiece, boost::forward_traversal_tag> {
|
template <class Find, bool SkipEmpty = false> class TokenIter : public std::iterator<std::forward_iterator_tag, const StringPiece, std::ptrdiff_t, const StringPiece *, const StringPiece &> {
|
||||||
public:
|
public:
|
||||||
TokenIter() {}
|
TokenIter() {}
|
||||||
|
|
||||||
template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
|
template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
|
||||||
increment();
|
++*this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!() const {
|
bool operator!() const {
|
||||||
@ -116,10 +115,15 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
|
|||||||
return TokenIter<Find, SkipEmpty>();
|
return TokenIter<Find, SkipEmpty>();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
bool operator==(const TokenIter<Find, SkipEmpty> &other) const {
|
||||||
friend class boost::iterator_core_access;
|
return current_.data() == other.current_.data();
|
||||||
|
}
|
||||||
|
|
||||||
void increment() {
|
bool operator!=(const TokenIter<Find, SkipEmpty> &other) const {
|
||||||
|
return !(*this == other);
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenIter<Find, SkipEmpty> &operator++() {
|
||||||
do {
|
do {
|
||||||
StringPiece found(finder_.Find(after_));
|
StringPiece found(finder_.Find(after_));
|
||||||
current_ = StringPiece(after_.data(), found.data() - after_.data());
|
current_ = StringPiece(after_.data(), found.data() - after_.data());
|
||||||
@ -129,17 +133,25 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
|
|||||||
after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
|
after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
|
||||||
}
|
}
|
||||||
} while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
|
} while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool equal(const TokenIter<Find, SkipEmpty> &other) const {
|
TokenIter<Find, SkipEmpty> &operator++(int) {
|
||||||
return current_.data() == other.current_.data();
|
TokenIter<Find, SkipEmpty> ret(*this);
|
||||||
|
++*this;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const StringPiece &dereference() const {
|
const StringPiece &operator*() const {
|
||||||
UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
|
UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
|
||||||
return current_;
|
return current_;
|
||||||
}
|
}
|
||||||
|
const StringPiece *operator->() const {
|
||||||
|
UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
|
||||||
|
return ¤t_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
StringPiece current_;
|
StringPiece current_;
|
||||||
StringPiece after_;
|
StringPiece after_;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user