Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 197167 | Differences between
and this patch

Collapse All | Expand All

(-)tesseract-2.03-orig/training/unicharset_extractor.cpp (-16 / +24 lines)
Lines 59-61 Link Here
59
  int step = UNICHAR::utf8_step(c_string);
59
  int step = 0;
60
  if (step == 0)
60
  int len = strlen(c_string);
61
    return; // Invalid utf-8.
61
  for (int offset = 0; offset < len; offset += step) {
62
    step = UNICHAR::utf8_step(c_string + offset);
63
    if (step == 0)
64
      break; // Invalid utf-8.
Lines 63-65 Link Here
63
  // Get the next Unicode cond point in the string.
66
    // Get the next Unicode cond point in the string.
64
  UNICHAR ch(c_string, step);
67
    UNICHAR ch(c_string + offset, step);
65
  wc = ch.first_uni();
68
    wc = ch.first_uni();
Lines 67-73 Link Here
67
  /* Copy the properties. */
70
    /* Copy the properties. */
68
  if (iswalpha(wc)) {
71
    if (iswalpha(wc)) {
69
    unicharset->set_isalpha(id, 1);
72
      unicharset->set_isalpha(id, 1);
70
    if (iswlower(wc))
73
      if (iswlower(wc))
71
      unicharset->set_islower(id, 1);
74
        unicharset->set_islower(id, 1);
72
    if (iswupper(wc))
75
      if (iswupper(wc))
73
      unicharset->set_isupper(id, 1);
76
        unicharset->set_isupper(id, 1);
77
    }
78
    if (iswdigit(wc))
79
      unicharset->set_isdigit(id, 1);
Lines 75-76 Link Here
75
  if (iswdigit(wc))
76
    unicharset->set_isdigit(id, 1);
Line 79 Link Here
84
enum {
85
        kBufSize = 4096 //default buffer size
86
};
87
Line 122 Link Here
122
    char c_string[kBoxReadBufSize];
130
    char c_string[kBufSize];

Return to bug 197167