* Copyright 2013-2014 Haiku Inc. All rights reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
* François Revol, revol@free.fr
*/
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <Directory.h>
#include <DynamicBuffer.h>
#include <File.h>
#include <GopherRequest.h>
#include <NodeInfo.h>
#include <Path.h>
#include <Socket.h>
#include <StackOrHeapArray.h>
#include <String.h>
#include <StringList.h>
using namespace BPrivate::Network;
* TODO: fix '+' in selectors, cf. gopher://gophernicus.org/1/doc/gopher/
* TODO: add proper favicon
* TODO: add proper dir and document icons
* TODO: correctly eat the extraneous .\r\n at end of text files
* TODO: move parsing stuff to a translator?
*
* docs:
* gopher://gopher.floodgap.com/1/gopher/tech
* gopher://gopher.floodgap.com/0/overbite/dbrowse?pluginm%201
*
* tests:
* gopher://sdf.org/1/sdf/historical images
* gopher://gopher.r-36.net/1/ large photos
* gopher://sdf.org/1/sdf/classes binaries
* gopher://sdf.org/1/users/ long page
* gopher://jgw.mdns.org/1/ search items
* gopher://jgw.mdns.org/1/MISC/ 's' item (sound)
* gopher://gopher.floodgap.com/1/gopher broken link
* gopher://sdf.org/1/maps/m missing lines
* gopher://sdf.org/1/foo gophernicus reports errors incorrectly
* gopher://gopher.floodgap.com/1/foo correct error report
*/
typedef enum {
GOPHER_TYPE_NONE = 0,
GOPHER_TYPE_ENDOFPAGE = '.',
GOPHER_TYPE_TEXTPLAIN = '0',
GOPHER_TYPE_DIRECTORY = '1',
GOPHER_TYPE_CSO_SEARCH = '2',
GOPHER_TYPE_ERROR = '3',
GOPHER_TYPE_BINHEX = '4',
GOPHER_TYPE_BINARCHIVE = '5',
GOPHER_TYPE_UUENCODED = '6',
GOPHER_TYPE_QUERY = '7',
GOPHER_TYPE_TELNET = '8',
GOPHER_TYPE_BINARY = '9',
GOPHER_TYPE_DUPSERV = '+',
GOPHER_TYPE_GIF = 'g',
GOPHER_TYPE_IMAGE = 'I',
GOPHER_TYPE_TN3270 = 'T',
* cf. http://en.wikipedia.org/wiki/Gopher_%28protocol%29#Gopher_item_types
*/
GOPHER_TYPE_HTML = 'h',
GOPHER_TYPE_INFO = 'i',
GOPHER_TYPE_AUDIO = 's',
GOPHER_TYPE_DOC = 'd',
GOPHER_TYPE_PNG = 'p',
GOPHER_TYPE_MIME = 'M',
GOPHER_TYPE_PDF = 'P',
GOPHER_TYPE_BITMAP = ':',
GOPHER_TYPE_MOVIE = ';',
GOPHER_TYPE_SOUND = '<',
GOPHER_TYPE_CALENDAR = 'c',
GOPHER_TYPE_EVENT = 'e',
GOPHER_TYPE_MBOX = 'm',
} gopher_item_type;
typedef enum {
FIELD_NAME,
FIELD_SELECTOR,
FIELD_HOST,
FIELD_PORT,
FIELD_GPFLAG,
FIELD_EOL,
FIELD_COUNT = FIELD_EOL
} gopher_field;
static struct {
gopher_item_type type;
const char *mime;
} gopher_type_map[] = {
{ GOPHER_TYPE_TEXTPLAIN, "text/plain" },
{ GOPHER_TYPE_DIRECTORY, "text/html;charset=UTF-8" },
{ GOPHER_TYPE_QUERY, "text/html;charset=UTF-8" },
{ GOPHER_TYPE_GIF, "image/gif" },
{ GOPHER_TYPE_HTML, "text/html" },
{ GOPHER_TYPE_PDF, "application/pdf" },
{ GOPHER_TYPE_PNG, "image/png"},
{ GOPHER_TYPE_NONE, NULL }
};
static const char *kStyleSheet = "\n"
"/*\n"
" * gopher listing style\n"
" */\n"
"\n"
"body#gopher {\n"
" /* margin: 10px;*/\n"
" background-color: Window;\n"
" color: WindowText;\n"
" font-size: 100%;\n"
" padding-bottom: 2em; }\n"
"\n"
"body#gopher div.uplink {\n"
" padding: 0;\n"
" margin: 0;\n"
" position: fixed;\n"
" top: 5px;\n"
" right: 5px; }\n"
"\n"
"body#gopher h1 {\n"
" padding: 5mm;\n"
" margin: 0;\n"
" border-bottom: 2px solid #777; }\n"
"\n"
"body#gopher span {\n"
" margin-left: 1em;\n"
" padding-left: 2em;\n"
" font-family: 'Noto Sans Mono', Courier, monospace;\n"
" word-wrap: break-word;\n"
" white-space: pre-wrap; }\n"
"\n"
"body#gopher span.error {\n"
" color: #f00; }\n"
"\n"
"body#gopher span.unknown {\n"
" color: #800; }\n"
"\n"
"body#gopher span.dir {\n"
" background-image: url('resource:icons/directory.png');\n"
" background-repeat: no-repeat;\n"
" background-position: bottom left; }\n"
"\n"
"body#gopher span.text {\n"
" background-image: url('resource:icons/content.png');\n"
" background-repeat: no-repeat;\n"
" background-position: bottom left; }\n"
"\n"
"body#gopher span.query {\n"
" background-image: url('resource:icons/search.png');\n"
" background-repeat: no-repeat;\n"
" background-position: bottom left; }\n"
"\n"
"body#gopher span.img img {\n"
" display: block;\n"
" margin-left:auto;\n"
" margin-right:auto; }\n";
static const int32 kGopherBufferSize = 4096;
static const bool kInlineImages = true;
BGopherRequest::BGopherRequest(const BUrl& url, BDataIO* output,
BUrlProtocolListener* listener, BUrlContext* context)
:
BNetworkRequest(url, output, listener, context, "BUrlProtocol.Gopher",
"gopher"),
fItemType(GOPHER_TYPE_NONE),
fPosition(0),
fResult()
{
fSocket = new(std::nothrow) BSocket();
fPath = Url().Path();
if (!Url().HasPath() || fPath.Length() == 0 || fPath == "/") {
fItemType = GOPHER_TYPE_DIRECTORY;
fPath = "";
} else if (fPath.Length() > 1 && fPath[0] == '/') {
fItemType = fPath[1];
fPath.Remove(0, 2);
}
}
BGopherRequest::~BGopherRequest()
{
Stop();
delete fSocket;
}
status_t
BGopherRequest::Stop()
{
if (fSocket != NULL) {
fSocket->Disconnect();
}
return BNetworkRequest::Stop();
}
const BUrlResult&
BGopherRequest::Result() const
{
return fResult;
}
status_t
BGopherRequest::_ProtocolLoop()
{
if (fSocket == NULL)
return B_NO_MEMORY;
if (!_ResolveHostName(fUrl.Host(), fUrl.HasPort() ? fUrl.Port() : 70)) {
_EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR,
"Unable to resolve hostname (%s), aborting.",
fUrl.Host().String());
return B_SERVER_NOT_FOUND;
}
_EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT, "Connection to %s on port %d.",
fUrl.Authority().String(), fRemoteAddr.Port());
status_t connectError = fSocket->Connect(fRemoteAddr);
if (connectError != B_OK) {
_EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR, "Socket connection error %s",
strerror(connectError));
return connectError;
}
if (fListener != NULL)
fListener->ConnectionOpened(this);
_EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT,
"Connection opened, sending request.");
_SendRequest();
_EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT, "Request sent.");
bool receiveEnd = false;
status_t readError = B_OK;
ssize_t bytesRead = 0;
bool dataValidated = false;
BStackOrHeapArray<char, 4096> chunk(kGopherBufferSize);
while (!fQuit && !receiveEnd) {
bytesRead = fSocket->Read(chunk, kGopherBufferSize);
if (bytesRead < 0) {
readError = bytesRead;
break;
} else if (bytesRead == 0)
receiveEnd = true;
fInputBuffer.AppendData(chunk, bytesRead);
if (!dataValidated) {
size_t i;
if (fInputBuffer.Size() && fInputBuffer.Data()[0] == '3') {
int tabs = 0;
bool crlf = false;
for (i = 0; i < fInputBuffer.Size(); i++) {
char c = fInputBuffer.Data()[i];
if (c == '\t') {
if (!crlf)
tabs++;
} else if (c == '\r' || c == '\n') {
if (tabs < 3)
break;
crlf = true;
} else if (!isprint(fInputBuffer.Data()[i])) {
crlf = false;
break;
}
}
if (crlf && tabs > 2 && tabs < 5) {
fItemType = GOPHER_TYPE_DIRECTORY;
readError = B_RESOURCE_NOT_FOUND;
}
}
static const char *buggy = "Error: File or directory not found!";
if (fInputBuffer.Size() > strlen(buggy)
&& !memcmp(fInputBuffer.Data(), buggy, strlen(buggy))) {
fItemType = GOPHER_TYPE_DIRECTORY;
readError = B_RESOURCE_NOT_FOUND;
}
dataValidated = true;
if (fListener != NULL)
fListener->ResponseStarted(this);
const char *mime = "application/octet-stream";
for (i = 0; gopher_type_map[i].type != GOPHER_TYPE_NONE; i++) {
if (gopher_type_map[i].type == fItemType) {
mime = gopher_type_map[i].mime;
break;
}
}
fResult.SetContentType(mime);
if (fListener != NULL)
fListener->HeadersReceived(this);
}
if (_NeedsParsing())
readError = _ParseInput(receiveEnd);
else if (fInputBuffer.Size()) {
if (fOutput != NULL) {
size_t written = 0;
readError = fOutput->WriteExactly(
(const char*)fInputBuffer.Data(), fInputBuffer.Size(),
&written);
if (fListener != NULL && written > 0)
fListener->BytesWritten(this, written);
if (readError != B_OK)
break;
}
fPosition += fInputBuffer.Size();
if (fListener != NULL)
fListener->DownloadProgress(this, fPosition, 0);
char *inputTempBuffer = new(std::nothrow) char[bytesRead];
if (inputTempBuffer == NULL) {
readError = B_NO_MEMORY;
break;
}
fInputBuffer.RemoveData(inputTempBuffer, fInputBuffer.Size());
delete[] inputTempBuffer;
}
}
if (fPosition > 0)
fResult.SetLength(fPosition);
fSocket->Disconnect();
if (readError != B_OK)
return readError;
return fQuit ? B_INTERRUPTED : B_OK;
}
void
BGopherRequest::_SendRequest()
{
BString request;
request << fPath;
if (Url().HasRequest())
request << '\t' << Url().Request();
request << "\r\n";
fSocket->Write(request.String(), request.Length());
}
bool
BGopherRequest::_NeedsParsing()
{
if (fItemType == GOPHER_TYPE_DIRECTORY
|| fItemType == GOPHER_TYPE_QUERY)
return true;
return false;
}
bool
BGopherRequest::_NeedsLastDotStrip()
{
if (fItemType == GOPHER_TYPE_DIRECTORY
|| fItemType == GOPHER_TYPE_QUERY
|| fItemType == GOPHER_TYPE_TEXTPLAIN)
return true;
return false;
}
status_t
BGopherRequest::_ParseInput(bool last)
{
BString line;
while (_GetLine(line) == B_OK) {
char type = GOPHER_TYPE_NONE;
BStringList fields;
line.MoveInto(&type, 0, 1);
line.Split("\t", false, fields);
if (type != GOPHER_TYPE_ENDOFPAGE
&& fields.CountStrings() < FIELD_GPFLAG)
_EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT,
"Unterminated gopher item (type '%c')", type);
BString pageTitle;
BString item;
BString title = fields.StringAt(FIELD_NAME);
BString link("gopher://");
BString user;
if (fields.CountStrings() > 3) {
link << fields.StringAt(FIELD_HOST);
if (fields.StringAt(FIELD_PORT).Length())
link << ":" << fields.StringAt(FIELD_PORT);
link << "/" << type;
link << fields.StringAt(FIELD_SELECTOR);
}
_HTMLEscapeString(title);
_HTMLEscapeString(link);
switch (type) {
case GOPHER_TYPE_ENDOFPAGE:
break;
case GOPHER_TYPE_TEXTPLAIN:
item << "<a href=\"" << link << "\">"
"<span class=\"text\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_BINARY:
case GOPHER_TYPE_BINHEX:
case GOPHER_TYPE_BINARCHIVE:
case GOPHER_TYPE_UUENCODED:
item << "<a href=\"" << link << "\">"
"<span class=\"binary\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_DIRECTORY:
* directory link
*/
item << "<a href=\"" << link << "\">"
"<span class=\"dir\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_ERROR:
item << "<span class=\"error\">" << title << "</span>"
"<br/>\n";
if (fPosition == 0 && pageTitle.Length() == 0)
pageTitle << "Error: " << title;
break;
case GOPHER_TYPE_QUERY:
* For now we use an unnamed input field and accept sending ?=foo
* as it seems at least Veronica-2 ignores the = but it's unclean.
*/
item << "<form method=\"get\" action=\"" << link << "\" "
"onsubmit=\"window.location = this.action + '?' + "
"this.elements['q'].value; return false;\">"
"<span class=\"query\">"
"<label>" << title << " "
"<input id=\"q\" name=\"\" type=\"text\" align=\"right\" />"
"</label>"
"</span></form>"
"<br/>\n";
break;
case GOPHER_TYPE_TELNET:
* cf. gopher://78.80.30.202/1/ps3
* -> gopher://78.80.30.202:23/8/ps3/new -> new@78.80.30.202
*/
link = "telnet://";
user = fields.StringAt(FIELD_SELECTOR);
if (user.FindLast('/') > -1) {
user.Remove(0, user.FindLast('/'));
link << user << "@";
}
link << fields.StringAt(FIELD_HOST);
if (fields.StringAt(FIELD_PORT) != "23")
link << ":" << fields.StringAt(FIELD_PORT);
item << "<a href=\"" << link << "\">"
"<span class=\"telnet\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_TN3270:
link = "tn3270://";
user = fields.StringAt(FIELD_SELECTOR);
if (user.FindLast('/') > -1) {
user.Remove(0, user.FindLast('/'));
link << user << "@";
}
link << fields.StringAt(FIELD_HOST);
if (fields.StringAt(FIELD_PORT) != "23")
link << ":" << fields.StringAt(FIELD_PORT);
item << "<a href=\"" << link << "\">"
"<span class=\"telnet\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_CSO_SEARCH:
* At least Lynx supports a cso:// URI scheme:
* http://lynx.isc.org/lynx2.8.5/lynx2-8-5/lynx_help/lynx_url_support.html
*/
link = "cso://";
user = fields.StringAt(FIELD_SELECTOR);
if (user.FindLast('/') > -1) {
user.Remove(0, user.FindLast('/'));
link << user << "@";
}
link << fields.StringAt(FIELD_HOST);
if (fields.StringAt(FIELD_PORT) != "105")
link << ":" << fields.StringAt(FIELD_PORT);
item << "<a href=\"" << link << "\">"
"<span class=\"cso\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_GIF:
case GOPHER_TYPE_IMAGE:
case GOPHER_TYPE_PNG:
case GOPHER_TYPE_BITMAP:
if (kInlineImages) {
item << "<a href=\"" << link << "\">"
"<span class=\"img\">" << title << " "
"<img src=\"" << link << "\" "
"alt=\"" << title << "\"/>"
"</span></a>"
"<br/>\n";
break;
}
item << "<a href=\"" << link << "\">"
"<span class=\"img\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_HTML:
if (fields.StringAt(FIELD_SELECTOR).StartsWith("URL:")) {
link = fields.StringAt(FIELD_SELECTOR);
link.Remove(0, 4);
}
item << "<a href=\"" << link << "\">"
"<span class=\"html\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_INFO:
if (fPosition == 0 && pageTitle.Length() == 0
&& fields.StringAt(FIELD_SELECTOR) == "TITLE") {
pageTitle = title;
break;
}
item << "<span class=\"info\">" << title << "</span>"
"<br/>\n";
break;
case GOPHER_TYPE_AUDIO:
case GOPHER_TYPE_SOUND:
item << "<a href=\"" << link << "\">"
"<span class=\"audio\">" << title << "</span></a>"
"<audio src=\"" << link << "\" "
"alt=\"" << title << "\"/>"
"<span>[player]</span></audio>"
"<br/>\n";
break;
case GOPHER_TYPE_PDF:
case GOPHER_TYPE_DOC:
item << "<a href=\"" << link << "\">"
"<span class=\"document\">" << title << "</span></a>"
"<br/>\n";
break;
case GOPHER_TYPE_MOVIE:
item << "<a href=\"" << link << "\">"
"<span class=\"video\">" << title << "</span></a>"
"<video src=\"" << link << "\" "
"alt=\"" << title << "\"/>"
"<span>[player]</span></audio>"
"<br/>\n";
break;
default:
_EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT,
"Unknown gopher item (type 0x%02x '%c')", type, type);
item << "<a href=\"" << link << "\">"
"<span class=\"unknown\">" << title << "</span></a>"
"<br/>\n";
break;
}
if (fPosition == 0) {
if (pageTitle.Length() == 0)
pageTitle << "Index of " << Url();
const char *uplink = ".";
if (fPath.EndsWith("/"))
uplink = "..";
BString header;
header <<
"<html>\n"
"<head>\n"
"<meta http-equiv=\"Content-Type\""
" content=\"text/html; charset=UTF-8\" />\n"
"<style type=\"text/css\">\n" << kStyleSheet << "</style>\n"
"<title>" << pageTitle << "</title>\n"
"</head>\n"
"<body id=\"gopher\">\n"
"<div class=\"uplink dontprint\">\n"
"<a href=" << uplink << ">[up]</a>\n"
"<a href=\"/\">[top]</a>\n"
"</div>\n"
"<h1>" << pageTitle << "</h1>\n";
if (fOutput != NULL) {
size_t written = 0;
status_t error = fOutput->WriteExactly(header.String(),
header.Length(), &written);
if (fListener != NULL && written > 0)
fListener->BytesWritten(this, written);
if (error != B_OK)
return error;
}
fPosition += header.Length();
if (fListener != NULL)
fListener->DownloadProgress(this, fPosition, 0);
}
if (item.Length()) {
if (fOutput != NULL) {
size_t written = 0;
status_t error = fOutput->WriteExactly(item.String(),
item.Length(), &written);
if (fListener != NULL && written > 0)
fListener->BytesWritten(this, written);
if (error != B_OK)
return error;
}
fPosition += item.Length();
if (fListener != NULL)
fListener->DownloadProgress(this, fPosition, 0);
}
}
if (last) {
BString footer =
"</div>\n"
"</body>\n"
"</html>\n";
if (fListener != NULL) {
size_t written = 0;
status_t error = fOutput->WriteExactly(footer.String(),
footer.Length(), &written);
if (fListener != NULL && written > 0)
fListener->BytesWritten(this, written);
if (error != B_OK)
return error;
}
fPosition += footer.Length();
if (fListener != NULL)
fListener->DownloadProgress(this, fPosition, 0);
}
return B_OK;
}
BString&
BGopherRequest::_HTMLEscapeString(BString &str)
{
str.ReplaceAll("&", "&");
str.ReplaceAll("<", "<");
str.ReplaceAll(">", ">");
return str;
}