//---------------------------------------------------------------------------
/*
HtmlPage, HTML page class
Copyright (C) 2011 Richel Bilderbeek
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppHtmlPage.htm
//---------------------------------------------------------------------------
#include <fstream>
#include <iostream>
//---------------------------------------------------------------------------
#include <boost/foreach.hpp>
#include <boost/regex.hpp>
#include <boost/algorithm/string.hpp>
//---------------------------------------------------------------------------
#include "htmlpage.h"
//---------------------------------------------------------------------------
HtmlPage::HtmlPage(const std::string& filename)
: m_filename(filename)
{
assert(FileExists(filename));
const boost::regex title_regex("<title>.*</title>");
//Copy all filenames matching the regex in the resulting std::vector
BOOST_FOREACH(const std::string& s, FileToVector(filename))
{
if (boost::regex_search(s,title_regex))
{
std::string t = s;
//Trim leading whitespace
while (!std::isgraph(t[0])) t = t.substr(1,t.size() - 1);
//Trim trailing whitespace
while (!std::isgraph(t[t.size()-1])) t.resize(t.size() - 1);
//Extract title
assert(t.substr(0,7)=="<title>");
assert(t.substr(t.size()-8,8)=="</title>");
m_title = t.substr(7,t.size()-8-7);
m_title = ReplaceAll(m_title,"&","&");
}
}
}
//---------------------------------------------------------------------------
///FileExists checks if a certain file exists
///From http://www.richelbilderbeek.nl/CppFileExists.htm
bool HtmlPage::FileExists(const std::string& filename)
{
std::fstream f;
f.open(filename.c_str(),std::ios::in);
return f.is_open();
}
//---------------------------------------------------------------------------
///FileToVector reads a file and converts it to a std::vector<std::string>
///From http://www.richelbilderbeek.nl/CppFileToVector.htm
const std::vector<std::string> HtmlPage::FileToVector(const std::string& filename)
{
assert(FileExists(filename));
std::vector<std::string> v;
std::ifstream in(filename.c_str());
std::string s;
for (int i=0; !in.eof(); ++i)
{
std::getline(in,s);
v.push_back(s);
}
return v;
}
//---------------------------------------------------------------------------
//From http://www.richelbilderbeek.nl/CppReplaceAll.htm
const std::string HtmlPage::ReplaceAll(
std::string s,
const std::string& replaceWhat,
const std::string& replaceWithWhat)
{
while(1)
{
const int pos = s.find(replaceWhat);
if (pos==-1) break;
s.replace(pos,replaceWhat.size(),replaceWithWhat);
}
return s;
}
//---------------------------------------------------------------------------
bool operator<(const HtmlPage& lhs, const HtmlPage& rhs)
{
//Case insensitive compare
return boost::algorithm::to_lower_copy(lhs.GetTitle())
< boost::algorithm::to_lower_copy(rhs.GetTitle());
}
//---------------------------------------------------------------------------
|