Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace strcpy and strncpy by new inline helper function #4250

Merged
merged 1 commit into from
May 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/api/altorenderer.cpp
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
// limitations under the License.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
@@ -270,12 +271,9 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {

alto_str << "\t\t\t</PrintSpace>\n"
<< "\t\t</Page>\n";
const std::string &text = alto_str.str();

char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
delete res_it;
return result;
return copy_string(alto_str.str());
}

} // namespace tesseract
15 changes: 4 additions & 11 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@
#include "equationdetect.h" // for EquationDetect, destructor of equ_detect_
#endif // ndef DISABLED_LEGACY_ENGINE
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for IntCastRounded, chomp_string
#include "helpers.h" // for IntCastRounded, chomp_string, copy_string
#include "host.h" // for MAX_PATH
#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3, ...
#ifndef DISABLED_LEGACY_ENGINE
@@ -1378,9 +1378,7 @@ char *TessBaseAPI::GetUTF8Text() {
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length() + 1);
return result;
return copy_string(text);
}

static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::string &text) {
@@ -1509,9 +1507,7 @@ char *TessBaseAPI::GetTSVText(int page_number) {
#endif
}

char *ret = new char[tsv_str.length() + 1];
strcpy(ret, tsv_str.c_str());
return ret;
return copy_string(tsv_str);
}

/** The 5 numbers output for each box (the usual 4 and a page number.) */
@@ -1759,10 +1755,7 @@ char *TessBaseAPI::GetOsdText(int page_number) {
<< "Orientation confidence: " << orient_conf << "\n"
<< "Script: " << script_name << "\n"
<< "Script confidence: " << script_conf << "\n";
const std::string &text = stream.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(stream.str());
}

#endif // ndef DISABLED_LEGACY_ENGINE
6 changes: 2 additions & 4 deletions src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
@@ -480,10 +481,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
hocr_str << " </div>\n";

const std::string &text = hocr_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(hocr_str.str());
}

/**********************************************************************
5 changes: 2 additions & 3 deletions src/api/lstmboxrenderer.cpp
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
@@ -81,10 +82,8 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of PAGE
}
char *ret = new char[lstm_box_str.length() + 1];
strcpy(ret, lstm_box_str.c_str());
delete res_it;
return ret;
return copy_string(lstm_box_str);
}

/**********************************************************************
10 changes: 2 additions & 8 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
// limitations under the License.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
@@ -1143,15 +1144,8 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
const std::string &text = reading_order_str.str();
reading_order_str.str("");

// Allocate memory for result to hold text.length() characters plus a null
// terminator Safely copy the string into result, ensuring no overflow strncpy
// does not necessarily null-terminate the destination, so do it manually
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length());
result[text.length()] = '\0';

delete res_it;
return result;
return copy_string(text);
}

} // namespace tesseract
7 changes: 2 additions & 5 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@

#include "pdf_ttf.h"
#include "tprintf.h"
#include "helpers.h" // for Swap
#include "helpers.h" // for Swap, copy_string

#include <allheaders.h>
#include <tesseract/baseapi.h>
@@ -497,10 +497,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
pdf_str << "ET\n"; // end the text object
}
}
const std::string &text = pdf_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(pdf_str.str());
}

bool TessPDFRenderer::BeginDocumentHandler() {
5 changes: 2 additions & 3 deletions src/api/wordstrboxrenderer.cpp
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
@@ -80,10 +81,8 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
wordstr_box_str += "\n";
}
char *ret = new char[wordstr_box_str.length() + 1];
strcpy(ret, wordstr_box_str.c_str());
delete res_it;
return ret;
return copy_string(wordstr_box_str);
}

/**********************************************************************
17 changes: 4 additions & 13 deletions src/ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@

#include <tesseract/ltrresultiterator.h>

#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"

@@ -76,10 +77,7 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}

// Set the string inserted at the end of each text line. "\n" by default.
@@ -310,11 +308,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we change this function return type to std::string?

char *LTRResultIterator::WordTruthUTF8Text() const {

to

std::string LTRResultIterator::WordTruthUTF8Text() const {

and also all calling sites?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even better, WordTruthUTF8Text should be removed because it is not used anywhere. But that can be done in a separate commit.

That function is part of the public API, so I think we have to keep it as it is.

return nullptr;
}
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char *result = new char[length];
strncpy(result, truth_text.c_str(), length);
return result;
return copy_string(it_->word()->blamer_bundle->TruthString());
}

// Returns the null terminated UTF-8 encoded normalized OCR string for the
@@ -330,10 +324,7 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
for (unsigned i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
auto length = ocr_text.length() + 1;
char *result = new char[length];
strncpy(result, ocr_text.c_str(), length);
return result;
return copy_string(ocr_text);
}

// Returns a pointer to serialized choice lattice.
6 changes: 2 additions & 4 deletions src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@

#include <tesseract/resultiterator.h>

#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"
#include "unicharset.h"
@@ -681,10 +682,7 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} break;
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*ResultIterator::GetRawLSTMTimesteps() const {
11 changes: 11 additions & 0 deletions src/ccutil/helpers.h
Original file line number Diff line number Diff line change
@@ -35,6 +35,17 @@

namespace tesseract {

// Copy a std::string to a newly allocated char *.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Copy a std::string to a newly allocated char *.
// Copy a std::string to a newly allocated char *.
// TODO: Remove this function once the related code has been converted to use std::string.

// TODO: Remove this function once the related code has been converted
// to use std::string.
inline char *copy_string(const std::string &from) {
auto length = from.length();
char *target_string = new char[length + 1];
from.copy(target_string, length);
target_string[length] = '\0';
return target_string;
}

template <class T>
inline bool contains(const std::vector<T> &data, const T &value) {
return std::find(data.begin(), data.end(), value) != data.end();