C++ “Segmentation fault” or “free(): invalid pointer” depending on input

This program gives a “Segmentation fault” or a “free(): invalid pointer” based on the input file used. The bug is reproductible.

This is very strange, especially because free is not called.

Here is the complete program.

// Open a file, read line by line,
// and for each (loooong) line, fraction it into
// small, justified lines.
// Justification done by adding spaces to existing spaces.

#include <iostream>
#include <iterator>
#include <sstream>
#include <fstream>
#include <list>
#include <vector>
#include <math.h>
#include <random>
#include <functional>

const int pageWidth = 50; // max width for a (justified) line

typedef std::vector<std::string> WordList;
typedef std::vector<int> SpaceList;

// ========
// HELPERS
// ========

// Helper to return "cccccc...ccc"
std::string repeat (const int n, char c) {
    std::string ret;
    for (int i{0}; i < n; ++i) {
        ret += c;
    }
    return ret;
}

// "Random" int between min and max (pseudo-random: reproductible)
unsigned int random_pred(std::size_t salt, unsigned int min, unsigned int max) {
    unsigned int output = min + (salt % static_cast<int>(max - min + 1));
    return output;
}

// alpha is greater at center
float alpha_weight(float z) { return std::max(100*(sin(z))*(sin(z)), float(1)); }

// Weight of a space, ie probability to add here blank space
int weight(int x, unsigned int l)
{
    float z = 3.141 * x / l;
    return alpha_weight(z);
}

// line -> vector of words
WordList splitTextIntoWords( const std::string &text )
{
    WordList words;
    std::istringstream in(text);
    std::copy(std::istream_iterator<std::string>(in),
              std::istream_iterator<std::string>(),
              std::back_inserter(words));
    return words;
}

// ======
// CORE
// ======

// Give each space a weight, a 'probability' to be expanded
SpaceList charge_spaces ( int l, const WordList & words, SpaceList spp) 
{
    SpaceList sp_weights;
    std::string p{ words[0] }, h;
    int wg;
    for (size_t i = 0; i < words.size()-1; ++i) {
            wg = weight(spp[i], l, words[i], words[i+1]);
            sp_weights.push_back(wg);
    }
    return sp_weights;
}

// Given weighted list of spaces positions, 'randomly' pick one
int random_sp( const SpaceList& spw, std::size_t salt ) {
    std::string m;
    unsigned int i{48}, total{0}; // ASCII 48 = ' '
    for (const int & n : spw) {
        char ch = static_cast<char>(i); // '1'; '2'; '3' ...
        std::string segment = repeat(n, ch); // "11"; "2222"; "3333333333333" ....
        m += segment;
        total += n;
        ++i;
    }  // now, m like "11112222222222333333333333333333334444444455555", of length <total>
    int mqrh = random_pred(salt, 0, total); // Get 0 <= random <= total
    char iss = m[mqrh]; // Read the char at this position (for example, more likely '3' here)
    int ret = static_cast<int>(iss) - 48; // Example: '3' -> 3
    return ret; // Example: return 3
}

// Add spaces to a (small) line, to justify it.
// We need to expand it by <excess> spaces.
std::string justifyLine( std::string line, WordList ww, SpaceList space_positions, int excess )
{
    SpaceList spwg = charge_spaces(line.size(), ww, space_positions);
    SpaceList spadd{spwg.begin(), spwg.end()}; // number of spaces to add after word <i>
    for (size_t k = 0; k < ww.size()-1; ++k) {
        spadd[k] = 1; // By default, 1 space after each word
    }
    int winner; // Which space will win additional space ?
    std::size_t str_hash = std::hash<std::string>{}(line) / 1000; // 'random' seed, reproductible
    for (int i{0}; i < excess; ++i) { // Where to add these <excess> needed spaces ?
        std::size_t salt = str_hash + 37*i;
        winner = random_sp(spwg, salt);
        spadd[winner] = spadd[winner] + 1; // space after <winner> word is incremented !
        spwg[winner] = std::max( spwg[winner] / 10, 1); // Weight of winner is decreased
    }
    // Build up the justified line
    std::string justified;
    for (size_t j = 0; j < ww.size()-1; ++j) {
        justified.append(ww[j]); // Add next word
        justified.append(spadd[j], ' '); // Add few spaces
    }
    justified.append(ww.back()); // Add last word
    std::cout << justified << std::endl;
    return justified;
}

// Fraction a long line in several justified small lines
void justifyText( const std::string& text )
{
    WordList words = splitTextIntoWords(text);
    std::string line;
    WordList ww;
    SpaceList space_positions;
    int position{0};
    int nwords_in_line{0};
    for (const std::string& word : words) {
        size_t s = word.size();
        if (line.size() + s + 1 <= pageWidth) { // next word fit into the line.
            if (!line.empty()) {
                line.append(" ");
                space_positions.push_back(position++);
            }
            line.append(word);
            nwords_in_line++;
            ww.push_back(word); // append this word to the list
            position += s;
        } else { // build a justified small line from the words added up
            justifyLine(line, ww, space_positions, pageWidth - position);
            line.clear(); // Cleaning for next chunk
            ww.clear();
            space_positions.clear();
            line = word;
            position = s;
            nwords_in_line = 1;
            ww.push_back(word); // don't forget the last word (that overflowed)
        }
    }
    std::cout << line << std::endl; // Remaining of the long line
}

// =====
// main
// =====
int main () {
    std::string line;
    std::ifstream myfile ("candle.txt");
    if (myfile.is_open())
    {
        while ( getline(myfile,line) )
        {
            justifyText(line);
        }
        myfile.close();
    }
    else std::cerr << "Unable to open file";
    return 0;
}

File “candle.txt” is an ASCII text file, here is a copy.

The whole file gives free(): invalid pointeralways at the same position — see below (1)

If cutting between the two markups in the PREFACE (deleting the chunk between the two CUT HEREmarks), program gives a Segmentation fault.

Running with Valgrind gives this (very strange because repeat function does not seem problematic)

Thread 1: status = VgTs_Runnable (lwpid 4487)
==4487==    at 0x4838DEF: operator new(unsigned long) (vg_replace_malloc.c:342)
==4487==    by 0x4990859: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_mutate(unsigned long, unsigned long, char const*, unsigned long) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28)
==4487==    by 0x4990F34: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::operator+=(char) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.28)
==4487==    by 0x10A406: repeat[abi:cxx11](int, char) (in /home/fig/Documents/cpp/cil)
==4487==    by 0x10A8DD: random_sp(std::vector<int, std::allocator<int> > const&, unsigned long) (in /home/fig/Documents/cpp/cil)
==4487==    by 0x10AB34: justifyLine(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >, std::vector<int, std::allocator<int> >, int) (in /home/fig/Documents/cpp/cil)
==4487==    by 0x10AF71: justifyText(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) (in /home/fig/Documents/cpp/cil)
==4487==    by 0x10B195: main (in /home/xxx/Documents/cpp/cil)

Any idea welcome

(1) End of output:

We have several tests for oxygen  besides the mere
burning of bodies. You have seen a candle burnt in
oxygen, or in the air;   you have  seen phosphorus
burnt in the air, or in oxygen; and  you have seen
iron-filings  burnt in  oxygen. But we  have other
tests besides  these, and  I am about  to refer to
one or two  of them for  he purpose of  carrying

Leave a Comment