1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
use std::sync::Mutex;
use std::ffi::{CString, CStr};
use std::str::from_utf8;
use std::default::Default;
use std::ptr::{null, null_mut};
use libc::{c_int, c_double, c_char};
use ffi::{CLDHints, Encoding, CLD2_ExtDetectLanguageSummary4,
CLD2_DetectLanguageVersion};
use ffi::Language as LanguageId;
use language::LanguageIdExt;
use types::*;
lazy_static! {
static ref CLD2_VERSION_LOCK: Mutex<u8> = Mutex::new(0);
}
pub fn detector_version() -> String {
unsafe {
let guard = CLD2_VERSION_LOCK.lock();
let version_string = CLD2_DetectLanguageVersion();
assert!(!version_string.is_null());
let bytes = CStr::from_ptr(version_string).to_bytes();
let result = from_utf8(bytes).unwrap().to_string();
drop(guard);
result
}
}
pub fn detect_language(text: &str, format: Format) ->
(Option<Lang>, Reliability)
{
let result = detect_language_ext(text, format, &Default::default());
(result.language, result.reliability)
}
pub fn detect_language_ext(text: &str, format: Format, hints: &Hints)
-> DetectionResult
{
let mut language3 = [LanguageId::UNKNOWN_LANGUAGE,
LanguageId::UNKNOWN_LANGUAGE,
LanguageId::UNKNOWN_LANGUAGE];
let mut percent3: [c_int; 3] = [0, 0, 0];
let mut normalized_score3: [c_double; 3] = [0.0, 0.0, 0.0];
let mut text_bytes: c_int = 0;
let mut is_reliable: bool = false;
unsafe {
hints.with_c_rep(|hints_ptr| {
let lang = CLD2_ExtDetectLanguageSummary4(
text.as_ptr() as *const i8, text.len() as c_int,
format == Format::Text, hints_ptr, 0,
language3.as_mut_ptr(),
percent3.as_mut_ptr(),
normalized_score3.as_mut_ptr(),
null_mut(), &mut text_bytes, &mut is_reliable);
from_ffi(lang, &language3, &percent3, &normalized_score3,
text_bytes, is_reliable)
})
}
}
fn to_c_str_or_null(s: Option<&str>) -> *const c_char {
let opt_c_str = s.map(|v| CString::new(v.as_bytes()).unwrap());
opt_c_str.map(|v| v.as_ptr()).unwrap_or(null())
}
trait WithCRep<R> {
fn with_c_rep<T, F: FnOnce(R) -> T>(&self, body: F) -> T;
}
impl<'a> WithCRep<*const CLDHints> for Hints<'a> {
fn with_c_rep<T, F: FnOnce(*const CLDHints) -> T>(&self, body: F) -> T {
let clang_ptr = to_c_str_or_null(self.content_language);
let tld_ptr = to_c_str_or_null(self.tld);
let lang = self.language
.map(|Lang(c)| LanguageIdExt::from_name(c))
.unwrap_or(LanguageId::UNKNOWN_LANGUAGE);
let encoding = self.encoding
.unwrap_or(Encoding::UNKNOWN_ENCODING) as c_int;
let hints =
CLDHints{content_language_hint: clang_ptr, tld_hint: tld_ptr,
encoding_hint: encoding, language_hint: lang};
body(&hints)
}
}
fn from_ffi(lang: LanguageId, language3: &[LanguageId; 3],
percent3: &[c_int; 3], normalized_score3: &[c_double; 3],
text_bytes: c_int, reliable: bool) -> DetectionResult
{
let score_n = |n: usize| {
LanguageScore{language: language3[n].to_lang(),
percent: percent3[n] as u8,
normalized_score: normalized_score3[n]}
};
DetectionResult::new(lang.to_lang(),
[score_n(0), score_n(1), score_n(2)],
text_bytes,
Reliability::from_bool(reliable))
}