Îêîí÷àòåëüíàÿ ïðîãðàììà
Íèæå ïðåäñòàâëåí ïîëíûé òåêñò ïðîãðàììû, ðàçðàáîòàííîé â ýòîé ãëàâå, ñ äâóìÿ ìîäèôèêàöèÿìè: ìû èíêàïñóëèðîâàëè âñå ñòðóêòóðû äàííûõ è ôóíêöèè â êëàññ TextQuery (â ïîñëåäóþùèõ ãëàâàõ ìû îáñóäèì ïîäîáíîå èñïîëüçîâàíèå êëàññîâ), êðîìå òîãî, òåêñò áûë èçìåíåí, òàê êàê íàø êîìïèëÿòîð ïîääåðæèâàë ñòàíäàðò Ñ++ íå ïîëíîñòüþ.
Íàïðèìåð, áèáëèîòåêà iostream íå ñîîòâåòñòâîâàëà òåêóùåìó ñòàíäàðòó. Øàáëîíû íå ïîääåðæèâàëè çíà÷åíèÿ àðãóìåíòîâ ïî óìîë÷àíèþ. Âîçìîæíî, âàì ïðèäåòñÿ èçìåíèòü êîå-÷òî â ýòîé ïðîãðàììå, ÷òîáû îíà êîìïèëèðîâàëàñü â âàøåé ñèñòåìå.
// ñòàíäàðòíûå çàãîëîâî÷íûå ôàéëû Ñ++
#include <algorithm>
#include <string>
#include <vector>
#include <utility>
#include <map>
#include <set>
// çàãîëîâî÷íûé ôàéë iostream, íå îòâå÷àþùèé ñòàíäàðòó
#include <fstream.h>
// çàãîëîâî÷íûå ôàéëû Ñ
#include <stddef.h>
#include <ctype.h>
// typedef äëÿ óäîáñòâà ÷òåíèÿ
typedef pair<short,short> location;
typedef vector<location,allocator> loc;
typedef vector<string,allocator> text;
typedef pair<text*,loc*> text_loc;
class TextQuery {
public:
TextQuery() { memset( this, 0, sizeof( TextQuery )); }
static void
filter_elements( string felems ) { filt_elems = felems; }
void query_text();
void display_map_text();
void display_text_locations();
void doit() {
retrieve_text();
separate_words();
filter_text();
suffix_text();
strip_caps();
build_word_map();
}
private:
void retrieve_text();
void separate_words():
void filter_text();
void strip_caps();
void suffix_textQ;
void suffix_s( string& );
void build_word_map();
private:
vector<string,allocator> *lines_of_text;
text_loc *text_locations;
map< string,loc*,
less<string>,allocator> *word_map;
ààà static stringàààààààààààààààà filt_elems;
};
string TextQuery::filt_elems( "\", Õ;: !?)(\V" );
int main()
{
ààà TextQuery tq;
ààà tq.doit();
ààà tq.query_text();
ààà tq.display_map_text();
}
void
TextQuery::
retrieve_text()
{
ààà string file_name;
ààà cout << " please enter file name: ";
ààà cin >> file_name;
ààà ifstream infile( file_name.c_str(), ios::in );
ààà if ( !infile ) {
ààààààà cerr << "oops' unable to open file "
àààààààààààà << file_name <<ààà " -- bailing out!\n";
àààààà àexit( -1 );
ààà }
ààà else cout << "\n";
ààà lines_of_text = new vector<string,allocator>;
ààà string textline;
ààà while ( getline( infile, textline, '\n' ))
ààààààà lines_of_text->push_back( textline );
}
void
TextQuery::
separate_words()
{
ààà vector<string,allocator> *words =
àààààààààààààà new vector<string,allocator>;
ààà vector<location,allocator> *locations =
àààààààààààààà new vector<location,allocator>;
ààà for ( short line_pos = 0; line_pos < lines_of_text->size();
ààààààà line_pos++ )
ààà {
ààààààà short word_pos = 0;
ààààààà string textline = (*lines_of_text)[ line_pos ];
ààààààà string::size_type eol = textline.1ength();
ààààààà string::size_type pos = 0, prev_pos = 0;
ààààààà while (( pos = textline.find_first_of( ' ', pos ))
ààààààààà àààààààààà!= string::npos )
ààààààà {
ààààààààààà words->push_back(
ààààààààààààààà textline.substr( prev_pos, pos - prev_pos ));
ààààààààààà locations->push_back(
ààààààààààààààà make_pair( line_pos, word_pos ));
ààààààààààà word_pos++; pos++; prev_pos = pos;
ààààààà }
ààààààà words->push_back(
ààààààààààà textline.substr( prev_pos, pos - prev_pos ));
ààààààà locations->push_back(make_pair(line_pos,word_pos));
ààà }
ààà text_locations = new text_loc( words, locations );
}
void
TextQuery::
filter_text()
{
ààà if ( filt_elems.empty() )
ààààààà return;
ààà vector<string,allocator> *words = text_locations->first;
ààà vector<string,allocator>:: iterator iter = words->begin();
ààà vector<string,allocator>::iterator iter_end = words->end();
ààà while ( iter != iter_end )
ààà {
ààààààà string::size_type pos = 0;
ààààààà while ((pos = (*iter).find_first_of(filt_elems, pos))
àààààààààààààààààà != string::npos )
ààààààààààà (*iter).erase(pos,l);
ààààààà ++iter;
ààà }
}
void
TextQuery::
suffix_text()
{
ààà vector<string,allocator> *words = text_locations->first;
ààà vector<string,allocator>::iterator iter = words->begin();
ààà vector<string,allocator>::iterator iter_end = words->end() ;
ààà while ( iter != iter_end ) {
ààààààà if ( (*iter).size() <= 3 )
àààààààà ààà{ iter++; continue; }
ààààààà if ( (*iter)[ (*iter).size()-l ] == 's' )
ààààààààààà suffix_s( *iter );
ààààààà // ôþÿþûýøªõû¹ýð þñ¨ðñþªú𠸺¯¯øú¸þò...
ààààààà iter++;
ààà }
}
void
TextQuery::
suffix_s( string &word )
{
ààà string::size_type spos = 0;
ààà string::size_type pos3 = word.size()-3;
ààà // "ous", "ss", "is", "ius"
ààà string suffixes( "oussisius" );
ààà if ( ! word.compare( pos3, 3, suffixes, spos, 3 ) ||
àààààààà ! word.compare( pos3, 3, suffixes, spos+6, 3) ||
àààààààà ! word.compare( pos3+l, 2, suffixes, spos+2, 2 ) ||
àààààààà ! word.compare( pos3+l, 2, suffixes, spos+4, 2 ))
ààààààààààà return;
ààà string ies( "ies" );
ààà if ( ! word.compare( pos3, 3, ies ))
ààà {
ààààààà word.replace( pos3, 3, 1, 'º' );
ààààààà return;
ààà }
ààà string ses( "ses" );
ààà if ( ! word.compare( pos3, 3, ses ))
ààà {
ààààààà word.erase( pos3+l, 2 );
ààààààà return;
ààà }
ààà // ºôðûøü 's' ò úþý¡õ
ààà word.erase( pos3+2 );
ààà // ºôðûøü "'s"
ààà if ( word[ pos3+l ] == '\'' )
ààààààà word.erase( pos3+l );
}
void
TextQuery::
strip_caps()
{
ààà vector<string,allocator> *words = text_locations->first;
ààà vector<string,allocator>::iterator iter = words->begin();
ààà vector<string,allocator>::iterator iter_end = words->end();
ààà string caps( "ABCDEFGHI3KLMNOPQRSTUVWXYZ" );
ààà while ( iter != iter_end ) {
ààààààà string::size_type pos = 0;
ààààààà while (( pos = (*iter).find_first_of( caps, pos ))
ààààààààààààààààààà != string::npos )
ààààààààààà (*iter)[ pos ] = to1ower( (*iter)[pos] );
ààààààà ++iter;
ààà }
}
void
TextQuery::
build_word_map()
{
ààà word_map = new map<string,loc*,less<string>,allocator>;
ààà typedef map<string,loc*,less<string>,allocator>::value_type
ààààààà value_type;
ààà typedef set<string,less<string>,allocator>::difference_type
ààààààà diff_type;
ààà set<string,less<string>,allocator> exclusion_set;
ààà ifstream infile( "exclusion_set" );
ààà if ( !infile )
ààà {
ààààààà static string default_excluded_words[25] = {
ààààààààà "the","and","but","that","then","are","been",
ààààààààà "can","can't","cannot","could","did","for",
ààààààààà "had","have","him","his","her","its"."into",
ààààààààà "were","which","when","with","would"
ààààààà };
ààààààà cerr <<
àààààààààà "warning! unable to open word exclusion file! -- "
àààààààààààà << "using default set\n";
ààààààà copy( default_excluded_words,
ààààààààààààà default_excluded_words+25,
ààààààààààààà inserter(exclusion_set, exclusion_set.begin()));
ààà }
ààà else {
ààààààà istream_iterator< string, diff_type >
ààààààààààà input_set( infile ), eos;
ààààààà copy( input_set, eos,
ààààààààààà inserter( exclusion_set, exclusion_set.begin() ));
ààà }
ààà // ÿ¨þñõöøü¸ ÿþ ò¸õü ¸ûþòðü, ò¸ªðòû ÿð¨v
ààà vector<string,allocator> *text_words =
ààààààà text_locations->first;
ààà vector<location,allocator> *text.locs =
ààààààà text_locations->second;
ààà register int elem_cnt = text_words->size();
ààà for ( int ix = 0; ix < elem_cnt; ++-ix )
ààà {
ààààààà string textword = ( *text_words )[ ix ];
ààààààà if ( textword.size() < 3 ||
ààààààààààà exclusion_set.count( textword ))
ààààààààààààààà continue;
ààààààà if ( ! word_map->count((*text_words)[ix] ))
ààààààà { // ¸ûþòþ þª¸ºª¸ªòºõª, ôþñðòøü:
ààààààààààà loc *ploc = new vector<location,allocator>;
ààààààààààà ploc->push_back( (*text_locs)[ix] );
ààààààààààà word_map->
ààààààààààààààà insert( value_type( (*text_words)[ix],ploc ));
ààààààà }
ààààààà else (*word_map) [(*text_words) [ix]]->
ààààààààààààààà push_back( (*text_locs) [ix] );
ààà }
}
void
TextQuery::
query_text()
{
ààà string query_text;
ààà do {
ààààààà cout
ààààààà << "enter a word against which to search the text.\n"
ààààààà << "to quit, enter a single character ==> ";
ààààààà cin >> query_text;
ààààààà if ( query_text.size() < 2 ) break;
ààààààà string caps( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" );
ààààààà string::size_type pos = 0;
ààààààà while (( pos = query_text.find_first_of( caps, pos ))
ààààààààààààààààà àà!= string::npos )
ààààààààààà query_text[ pos ] = to1ower( query_text[pos] );
ààààààà // query_text ôþûöýþ ñvª¹ òòõôõýþ
ààààààà if ( !word_map->count( query_text )) {
ààààààààààà cout << "\nSorry. There are no entries for "
àààààààààààààààà << query_text << ".\n\n";
ààààààààààà continue;
ààààààà }
ààààààà loc *ploc = (*word_map) [ query_text ];
ààààààà set<short,less<short>,allocator> occurrence_1i nes;
ààààààà loc::iterator liter = ploc->begin(),
ààààààààààààààààààààà liter_end = ploc->end();
ààààà ààwhile ( liter != liter_end ) {
àààààààààààà occurrence_lines.1nsert(
àààààààààààààààààà occurrence_lines.end(), (*liter).first);
àààààààààààà ++liter;
ààààààà }
ààààààà register int size = occurrence_lines.size();
ààààààà cout << "\n" << query_text
ààà ààààààààà<< " occurs " << size
àààààààààààà << (size == 1 ? " time:" : " times:")
àààààààààààà << "\n\n";
ààààààà set<short,less<short>,allocator>::iterator
ààààààààààààà it=occurrence_lines.begin();
ààààààà for ( ; it != occurrence_"lines.end(); ++it ) {
ààààààààààà int line = *it;
ààààààààààà cout << "\t( line "
àààààààààààààààà // ñºôõü ýºüõ¨þò𪹠¸ª¨þúø ¸ 1,
àààààààààààààààà // úðú ¤ªþ ÿ¨øý ªþ òõ÷ôõ
àààààààààààààààà << line + 1 << " ) "
àààààààààààààààà << (*lines_of_text)[line] << endl;
ààààààà }
ààààààà cout << endl;
ààà }
ààà while ( ! query_text.empty() );
ààà cout << "Ok, bye!\n";
}
void
TextQuery::
display_map_text()
{
ààà typedef map<string,loc*, less<string>, allocator> map_text;
ààà map_text::iterator iter = word_map->begin(),
àààààààààààààààààààààà iter_end = word_map->end();
ààà while ( iter != iter_end ) {
ààààààà cout << "word: " << (*iter).first << " (";
ààààààà intàààààààààà loc_cnt = 0;
ààààààà locààààààààà *text_locs = (*iter).second;
ààààààà loc::iterator literàààà = text_locs->begin(),
ààààààààààààààààààààà liter_end = text_locs->end();
ààààààà while ( liter != liter_end )
ààààààà {
ààààààààààà if ( loc_cnt )
ààààààààààààààà cout << ",";
ààààààààààà else ++loc_cnt;
ààààààààààà cout << "(" << (*liter).first
àààààààààààààààà << "," << (*liter).second << ")";
ààààààààààà ++"liter;
ààààààà }
ààààààà cout << ")\n";
ààààààà ++iter;
ààà }
ààà cout << endl;
}
void
TextQuery::
disp1ay_text_locations()
{
ààà vector<string,allocator> *text_words =
ààààààà text_locations->first;
ààà vector<location,allocator> *text_locs =
ààààààà text_locations->second;
ààà register int elem_cnt = text_words->size();
ààà if ( elem_cnt != text_locs->size() )
ààà {
ààààààà cerr
àààààààà << "oops! internal error: word and position vectors "
àààààààà << "are of unequal size\n"
àààààààà << "words: " << elem_cnt << " "
àààààààà << "locs: " << text_locs->size()
àààààààà << " -- bailing out!\n";
ààààààà exit( -2 );
ààà }
ààà for ( int ix=0; ix < elem_cnt; ix++ )
ààà {
ààààààà cout << "word: " << (*text_words)[ ix ] << "\t"
àààààààààààà << "location: ("
àààààààààààà << (*text_locs)[ix].first << ","
àààààààààààà << (*text.locs)[ix].second << ")"
àààààààààààà << "\n";
ààà }
ààà cout << endl;
}
Lÿ¨ðöýõýøõ 6.25
+ñ· ¸ýøªõ, ÿþ¢õüº ýðü ÿþª¨õñþòðû¸ ¸ÿõ¡øðû¹ývù úû𸸠inserter ôû ÷ðÿþûýõýø ýðñþ¨ð ¸ªþÿ-¸ûþò (¤ªþ ºÿþüøýðõª¸ ò ¨ð÷ôõûõ 6.13.1, ð ôõªðû¹ýþ ¨ð¸¸üðª¨øòðõª¸ ò 12.4.1).
set<string> exclusion_set;
ifstreamààà infile( "exclusion_set" );
copy( default_excluded_words, default_excluded_words+25,
ààààà inserter(exclusion_set, exclusion_set.begin() ));
Lÿ¨ðöýõýøõ 6.26
¦õ¨òþýð¢ðû¹ýð ¨õðûø÷ð¡ø ÿþø¸úþòþù ¸ø¸ªõüv þª¨ðöðõª ÿ¨þ¡õôº¨ývù ÿþô¿þô: ýðñþ¨ óûþñðû¹ýv¿ ¯ºýú¡øù þÿõ¨ø¨ºõª ýðñþ¨þü ýõ÷ðòø¸øüv¿ ¸ª¨ºúªº¨ ôðýýv¿. +úþý¢ðªõû¹ývù òð¨øðýª ÿ¨õô¸ªðòû õª ¸þñþù ðû¹ªõ¨ýðªøòývù ÿþô¿þô, úþóôð üv øýúðÿ¸ºûø¨ºõü ¯ºýú¡øø ø ôðýývõ ò úû𸸠TextQuery. T¨ðòýøªõ þñð ¸ÿþ¸þñð. ¦ðúþòv ýõôþ¸ªðªúø ø ÿ¨õøüº•õ¸ªòð úðöôþóþ?
Lÿ¨ðöýõýøõ 6.27
T ôðýýþù òõ¨¸øø ÿ¨þó¨ðüüv øü ¯ðùûð ¸ ªõú¸ªþü òòþôøª¸ ÿþ ÷ðÿ¨þ¸º. +þûõõ ºôþñýþ ñvûþ ñv ÷ðôðò𪹠õóþ úðú ÿð¨ðüõª¨ úþüðýôýþù ¸ª¨þúø; ò óûðòõ 7 üv ÿþúðöõü, úðú ¤ªþ ôõûðõª¸ . ¦ðúøõ õ•õ ÿð¨ðüõª¨v úþüðýôýþù ¸ª¨þúø öõûðªõû¹ýþ ¨õðûø÷þòðª¹?