presage 0.9.1
databaseConnector.cpp
Go to the documentation of this file.
1
2/******************************************************
3 * Presage, an extensible predictive text entry system
4 * ---------------------------------------------------
5 *
6 * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 **********(*)*/
23
24
25#include "databaseConnector.h"
26
27#include "../../core/utility.h"
28
29#include <list>
30#include <sstream>
31#include <stdlib.h>
32#include <assert.h>
33
34DatabaseConnector::DatabaseConnector(const std::string database_name,
35 const size_t cardinality,
36 const bool read_write)
37 : logger("DatabaseConnector", std::cerr)
38{
39 set_database_filename (database_name);
41 set_read_write_mode (read_write);
42}
43
44DatabaseConnector::DatabaseConnector(const std::string database_name,
45 const size_t cardinality,
46 const bool read_write,
47 const std::string& log_level)
48 : logger("DatabaseConnector", std::cerr, log_level)
49{
50 set_database_filename (database_name);
52 set_read_write_mode (read_write);
53}
54
57
58void DatabaseConnector::createNgramTable(const size_t n) const
59{
60 if (n > 0) {
61 std::stringstream query;
62 std::stringstream unique;
63 query << "CREATE TABLE";
64// This #ifdef does not belong here, but unfortunately SQLite 2.x does
65// not support the IF NOT EXISTS SQL clause.
66#ifndef HAVE_SQLITE_H
67 query << " IF NOT EXISTS";
68#endif
69 query << " _" << n << "_gram (";
70 for (int i = n - 1; i >= 0; i--) {
71 if (i != 0) {
72 unique << "word_" << i << ", ";
73 query << "word_" << i << " TEXT, ";
74 } else {
75 unique << "word";
76 query << "word TEXT, count INTEGER, UNIQUE(" << unique.str() << ") );";
77 }
78 }
79
80 executeSql(query.str());
81 } else {
82 // TODO
83 // throw exception
84 }
85}
86
88{
89 std::string query = "SELECT SUM(count) FROM _1_gram;";
90
91 NgramTable result = executeSql(query);
92
93 logger << DEBUG << "NgramTable:";
94 for (size_t i = 0; i < result.size(); i++) {
95 for (size_t j = 0; j < result[i].size(); j++) {
96 logger << DEBUG << result[i][j] << '\t';
97 }
98 logger << DEBUG << endl;
99 }
100
101 return extractFirstInteger(result);
102}
103
105{
106 std::stringstream query;
107 query << "SELECT count "
108 << "FROM _" << ngram.size() << "_gram"
109 << buildWhereClause(ngram) << ";";
110
111 NgramTable result = executeSql(query.str());
112
113 logger << DEBUG << "NgramTable:";
114 for (size_t i = 0; i < result.size(); i++) {
115 for (size_t j = 0; j < result[i].size(); j++) {
116 logger << DEBUG << result[i][j] << '\t';
117 }
118 logger << DEBUG << endl;
119 }
120
121 return extractFirstInteger(result);
122}
123
125{
126 std::stringstream query;
127 query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
128 << "FROM _" << ngram.size() << "_gram"
129 << buildWhereLikeClause(ngram)
130 << " ORDER BY count DESC";
131 if (limit < 0) {
132 query << ";";
133 } else {
134 query << " LIMIT " << limit << ';';
135 }
136
137 return executeSql(query.str());
138}
139
140NgramTable DatabaseConnector::getNgramLikeTableFiltered(const Ngram ngram, const char** filter, int limit) const
141{
142 std::stringstream query;
143 query << "SELECT " << buildSelectLikeClause(ngram.size()) << " "
144 << "FROM _" << ngram.size() << "_gram"
145 << buildWhereLikeClauseFiltered(ngram,filter)
146 << " ORDER BY count DESC";
147 if (limit < 0) {
148 query << ";";
149 } else {
150 query << " LIMIT " << limit << ';';
151 }
152
153 return executeSql(query.str());
154}
155
157{
158 int count = getNgramCount(ngram);
159
160 if (count > 0) {
161 // the ngram was found in the database
162 updateNgram(ngram, ++count);
163
164 logger << DEBUG << "Updated ngram to " << count << endl;
165
166 } else {
167 // the ngram was not found in the database
168 count = 1;
169 insertNgram(ngram, count);
170
171 logger << DEBUG << "Inserted ngram" << endl;
172
173 }
174 return count;
175}
176
178{}
179
180void DatabaseConnector::insertNgram(const Ngram ngram, const int count) const
181{
182 std::stringstream query;
183
184 query << "INSERT INTO _" << ngram.size() << "_gram "
185 << buildValuesClause(ngram, count)
186 << ";";
187
188 executeSql(query.str());
189}
190
191void DatabaseConnector::updateNgram(const Ngram ngram, const int count) const
192{
193 std::stringstream query;
194
195 query << "UPDATE _" << ngram.size() << "_gram "
196 << "SET count = " << count
197 << buildWhereClause(ngram) << ";";
198
199 executeSql(query.str());
200}
201
202std::string DatabaseConnector::buildWhereClause(const Ngram ngram) const
203{
204 std::stringstream where_clause;
205 where_clause << " WHERE";
206 for (size_t i = 0; i < ngram.size(); i++) {
207 if (i < ngram.size() - 1) {
208 where_clause << " word_" << ngram.size() - i - 1 << " = '"
209 << sanitizeString(ngram[i]) << "' AND";
210 } else {
211 where_clause << " word = '" << sanitizeString(ngram[ngram.size() - 1]) << "'";
212 }
213 }
214 return where_clause.str();
215}
216
217// TODO REVISIT refactor: this is same as buildWhereClause, except for
218// "word = " instead of "word LIKE "
219std::string DatabaseConnector::buildWhereLikeClause(const Ngram ngram) const
220{
221 std::stringstream where_clause;
222 where_clause << " WHERE";
223 for (size_t i = 0; i < ngram.size(); i++) {
224 if (i < ngram.size() - 1) {
225 where_clause << " word_" << ngram.size() - i - 1 << " = '"
226 << sanitizeString(ngram[i]) << "' AND";
227 } else {
228 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
229 }
230 }
231 return where_clause.str();
232}
233
234std::string DatabaseConnector::buildWhereLikeClauseFiltered(const Ngram ngram, const char** filter) const
235{
236 std::stringstream where_clause;
237 where_clause << " WHERE";
238 for (size_t i = 0; i < ngram.size(); i++) {
239 if (i < ngram.size() - 1) {
240 where_clause << " word_" << ngram.size() - i - 1 << " = '"
241 << sanitizeString(ngram[i]) << "' AND";
242 } else {
243 if(filter == 0)
244 where_clause << " word LIKE '" << sanitizeString(ngram[ngram.size() - 1]) << "%'";
245 else {
246 std::string true_prefix = sanitizeString(ngram[ngram.size() - 1]);
247 where_clause << " (";
248 for (int j = 0; filter[j] != 0; j++) {
249// for(size_t j=0; j < filter.size()-1; j++)
250 if (j) {
251 where_clause << " OR ";
252 }
253 where_clause << " word LIKE '" << true_prefix << filter[j] << "%'";
254 }
255// where_clause << " word LIKE '" << true_prefix <<"%' )";
256 where_clause << ')';
257 }
258 }
259 }
260 return where_clause.str();
261}
262
263
264std::string DatabaseConnector::buildSelectLikeClause(const int cardinality) const
265{
266 assert(cardinality > 0);
267
268 std::stringstream result;
269 for (int i = cardinality - 1; i >= 0; i--) {
270 if (i != 0) {
271 result << "word_" << i << ", ";
272 } else {
273 result << "word, count";
274 }
275 }
276
277 return result.str();
278}
279
280std::string DatabaseConnector::buildValuesClause(const Ngram ngram, const int count) const
281{
282 std::stringstream values_clause;
283 values_clause << "VALUES(";
284 for (size_t i = 0; i < ngram.size(); i++) {
285 if (i < ngram.size() - 1) {
286 values_clause << "'" << sanitizeString(ngram[i]) << "', ";
287 } else {
288 values_clause << "'" << sanitizeString(ngram[i]) << "', " << count << ")";
289 }
290 }
291 return values_clause.str();
292}
293
294std::string DatabaseConnector::sanitizeString(const std::string str) const
295{
296 std::string sanitized = str;
297 const std::string search = "'";
298 const std::string replace = "''";
299
300 // Escape single quotes
301 size_t pos = 0;
302 while ((pos = sanitized.find(search, pos)) != std::string::npos) {
303 sanitized.replace(pos, search.length(), replace);
304 pos += replace.length();
305 }
306 return sanitized;
307}
308
310{
311 // Initialize count to zero and then check that we have at least
312 // an entry in the table of ngram counts returned by the
313 // executeSql() method. If so, convert it into an integer and
314 // return it.
315 //
316 // REVISIT: make conversion to integer more robust (strtol ??)
317 //
318 int count = 0;
319 if (table.size() > 0) {
320 if (table[0].size() > 0) {
321 count = atoi(table[0][0].c_str());
322 }
323 }
324
325 logger << DEBUG << "table: ";
326 for (size_t i = 0; i < table.size(); i++) {
327 for (size_t j = 0; j < table[i].size(); j++) {
328 logger << DEBUG << table[i][j] << '\t';
329 }
330 logger << DEBUG << endl;
331 }
332
333 return (count > 0 ? count : 0);
334}
335
337{
338 executeSql("BEGIN TRANSACTION;");
339}
340
342{
343 executeSql("END TRANSACTION;");
344}
345
347{
348 executeSql("ROLLBACK TRANSACTION;");
349}
350
352{
353 return database_filename;
354}
355
356std::string DatabaseConnector::set_database_filename (const std::string& filename)
357{
358 std::string prev_filename = database_filename;
359
361
362 // make an attempt at determining whether directory where language
363 // model database is located exists and try to create it if it
364 // does not... only cater for one directory level to create it.
365 //
366 std::string dir = Utility::dirname (database_filename);
367 if (! dir.empty()) {
368 // check that specified directory exists and accessible
369 if (! Utility::is_directory_usable (dir)) {
370 // create it if not
372 }
373 }
374
375 return prev_filename;
376}
377
378std::string DatabaseConnector::expand_variables (std::string filepath) const
379{
380 // scan the filepath for variables, which follow the same pattern
381 // as shell variables - strings enclosed in '${' and '}'
382 //
383 const std::string start_marker = "${";
384 const std::string end_marker = "}";
385
386 std::list<std::string> variables;
387
388 std::string::size_type pos_start = filepath.find (start_marker);
389 while (pos_start != std::string::npos)
390 {
391 std::string::size_type pos_end = filepath.find (end_marker, pos_start);
392 if (pos_end != std::string::npos) {
393 variables.push_back (filepath.substr(pos_start + start_marker.size(), pos_end - end_marker.size() - pos_start - 1));
394 }
395
396 pos_start = filepath.find (start_marker, pos_end);
397 }
398
399 for (std::list<std::string>::const_iterator it = variables.begin();
400 it != variables.end();
401 it++)
402 {
403 substitute_variable_in_string(*it, filepath);
404 }
405
406 return filepath;
407}
408
409void DatabaseConnector::substitute_variable_in_string (const std::string& variable_name, std::string& filepath) const
410{
411 std::string variable_token = "${" + variable_name + "}";
412
413 for (std::string::size_type pos = filepath.find (variable_token);
414 pos != std::string::npos;
415 pos = filepath.find (variable_token, pos))
416 {
417 const char* value = getenv(variable_name.c_str());
418 if (value)
419 {
420 filepath.replace (pos,
421 variable_token.size(),
422 value);
423 }
424 else
425 {
426 // handle "special" variables
427 if (variable_name == "HOME")
428 {
429 value = getenv("USERPROFILE");
430 if (value)
431 {
432 filepath.replace (pos,
433 variable_token.size(),
434 value);
435 }
436 }
437 else
438 {
439 // FIXME: maybe throw exception instead of leaving
440 // variable name in string?
441 //
442 filepath.replace (pos,
443 variable_token.size(),
444 variable_name);
445 }
446 }
447 }
448}
449
451{
452 cardinality = card;
453}
454
456{
457 return cardinality;
458}
459
460void DatabaseConnector::set_read_write_mode (const bool read_write)
461{
462 read_write_mode = read_write;
463}
464
466{
467 return read_write_mode;
468}
void set_read_write_mode(const bool read_write)
virtual void endTransaction() const
void createNgramTable(const size_t cardinality) const
virtual NgramTable executeSql(const std::string query) const =0
std::string buildWhereLikeClauseFiltered(const Ngram ngram, const char **filter) const
virtual void beginTransaction() const
void substitute_variable_in_string(const std::string &variable_name, std::string &filepath) const
void removeNgram(const Ngram ngram) const
virtual void rollbackTransaction() const
NgramTable getNgramLikeTable(const Ngram ngram, int limit=-1) const
NgramTable getNgramLikeTableFiltered(const Ngram ngram, const char **filter, int limit=-1) const
std::string buildValuesClause(const Ngram ngram, const int count) const
int incrementNgramCount(const Ngram ngram) const
size_t get_cardinality() const
void insertNgram(const Ngram ngram, const int count) const
std::string buildWhereLikeClause(const Ngram ngram) const
int extractFirstInteger(const NgramTable &) const
std::string database_filename
std::string buildSelectLikeClause(const int cardinality) const
std::string buildWhereClause(const Ngram ngram) const
std::string set_database_filename(const std::string &filename)
int getUnigramCountsSum() const
int getNgramCount(const Ngram ngram) const
void updateNgram(const Ngram ngram, const int count) const
std::string sanitizeString(const std::string) const
std::string expand_variables(std::string filename) const
std::string get_database_filename() const
bool get_read_write_mode() const
void set_cardinality(const size_t cardinality)
DatabaseConnector(const std::string database_name, const size_t cardinality, const bool read_write)
Logger< char > logger
Definition ngram.h:33
static void create_directory(const std::string &dir)
Definition utility.cpp:330
static std::string dirname(const std::string &)
Definition utility.cpp:275
static bool is_directory_usable(const std::string &dir)
Definition utility.cpp:307
std::vector< Ngram > NgramTable
const Logger< _charT, _Traits > & endl(const Logger< _charT, _Traits > &lgr)
Definition logger.h:278