Arquivos
hhvm/hphp/runtime/base/file/file.cpp
T
Drew Paroski 84b9d9a3a2 Separate resources from objects, part 1
In HHVM (and HPHPc before it) we've been piggybacking resources on the
KindOfObject machinery. At the language level, resource is considered to
be a different type than object, and there are a number of differences
in behavior between objects and resources (ex. resources don't allow for
dynamic properties, resources don't work with the clone operator, the
"(object)" cast behaves differently for resources vs. objects, etc).

Piggybacking resources on the KindOfObject machinery has some downsides.
Code that deals with KindOfObject values often needs to check if the value
is a resource and go down a different code path. This makes things harder
to maintain and harder to keep parity with Zend. Also, these extra branches
hurt performance a little, and they make it harder for the JIT to do a good
job in some cases when its generating machine code that operates on objects.

This diff prepares the code base for a new KindOfResource type by adding a
new "Resource" smart pointer type (currently a typedef for the Object smart
pointer type) and it updates the C++ code and the idl files appropriately.
This diff is essentially a cosmetic change and should not impact run time
behavior. In the next diff (part 2) we'll actually add a new KindOfResource
type, detach ResourceData from the ObjectData inheritence hierarchy, and
provide a real implementation for the Resource smart pointer type (instead
of just aliasing the Object smart pointer type).
2013-07-10 11:16:33 -07:00

777 linhas
21 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/base/file/file.h"
#include "hphp/runtime/base/complex_types.h"
#include "hphp/runtime/base/util/string_buffer.h"
#include "hphp/runtime/base/type_conversions.h"
#include "hphp/runtime/base/builtin_functions.h"
#include "hphp/runtime/base/server/static_content_cache.h"
#include "hphp/runtime/base/server/virtual_host.h"
#include "hphp/runtime/base/runtime_option.h"
#include "hphp/runtime/base/runtime_error.h"
#include "hphp/runtime/base/array/array_init.h"
#include "hphp/util/logger.h"
#include "hphp/util/process.h"
#include "hphp/util/util.h"
#include "hphp/runtime/base/zend/zend_string.h"
#include "hphp/runtime/base/zend/zend_printf.h"
#include "hphp/runtime/base/util/exceptions.h"
#include <sys/file.h>
#include "hphp/runtime/base/array/array_iterator.h"
#include "hphp/runtime/base/file/stream_wrapper_registry.h"
namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
// statics
StaticString File::s_class_name("File");
StaticString File::s_resource_name("stream");
IMPLEMENT_REQUEST_LOCAL(FileData, s_file_data);
const int File::USE_INCLUDE_PATH = 1;
String File::TranslatePath(CStrRef filename, bool useFileCache /* = false */,
bool keepRelative /*= false */) {
String canonicalized(Util::canonicalize(filename.data(),
filename.size()), AttachString);
if (useFileCache) {
String translated = TranslatePath(canonicalized, false);
if (!translated.empty() && access(translated.data(), F_OK) < 0 &&
StaticContentCache::TheFileCache) {
if (StaticContentCache::TheFileCache->exists(canonicalized.data(),
false)) {
// we use file cache's file name to make stat() work
translated = String(RuntimeOption::FileCache);
}
}
return translated;
}
if (RuntimeOption::SafeFileAccess) {
const vector<string> &allowedDirectories =
VirtualHost::GetAllowedDirectories();
auto it = std::upper_bound(allowedDirectories.begin(),
allowedDirectories.end(), canonicalized,
[](CStrRef val, const string& dir) {
return strcmp(val.c_str(), dir.c_str()) < 0;
});
if (it != allowedDirectories.begin()) {
const string& dir = *--it;
if (dir.size() <= canonicalized.size() &&
!strncmp(dir.c_str(), canonicalized.c_str(), dir.size())) {
return canonicalized;
}
}
// disallow access with an absolute path
if (canonicalized.charAt(0) == '/') {
return "";
}
// unresolvable paths are all considered as unsafe
if (canonicalized.find("..") >= 0) {
assert(canonicalized.find("..") == 0);
return "";
}
}
if (canonicalized.charAt(0) == '/' || keepRelative) {
return canonicalized;
}
String cwd = g_context->getCwd();
if (!cwd.empty() && cwd[cwd.length() - 1] == '/') {
return cwd + canonicalized;
}
return cwd + "/" + canonicalized;
}
String File::TranslateCommand(CStrRef cmd) {
//TODO: security checking
return cmd;
}
bool File::IsVirtualDirectory(CStrRef filename) {
if (StaticContentCache::TheFileCache &&
StaticContentCache::TheFileCache->dirExists(filename.data(), false)) {
return true;
}
return false;
}
bool File::IsPlainFilePath(CStrRef filename) {
return filename.find("://") == String::npos;
}
Variant File::Open(CStrRef filename, CStrRef mode,
int options /* = 0 */,
CVarRef context /* = null */) {
File *file = Stream::open(filename, mode, options, context);
if (file != nullptr) {
file->m_name = filename.data();
file->m_mode = mode.data();
return Resource(file);
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
// constructor and destructor
File::File(bool nonblocking)
: m_fd(-1), m_closed(false), m_nonblocking(nonblocking), m_writepos(0),
m_readpos(0), m_position(0), m_buffer(nullptr) {
}
File::~File() {
closeImpl();
}
void File::closeImpl() {
if (m_buffer) {
free(m_buffer);
m_buffer = nullptr;
}
}
///////////////////////////////////////////////////////////////////////////////
// default implementation of virtual functions
int File::getc() {
if (m_writepos > m_readpos) {
m_position++;
return m_buffer[m_readpos++] & 0xff;
}
char buffer[1];
int64_t len = readImpl(buffer, 1);
if (len != 1) {
return EOF;
}
m_position += len;
return (int)(unsigned char)buffer[0];
}
String File::read(int64_t length) {
if (length <= 0) {
raise_notice("Invalid length %" PRId64, length);
return "";
}
String s = String(length, ReserveString);
char *ret = s.mutableSlice().ptr;
int64_t copied = 0;
int64_t avail = m_writepos - m_readpos;
while (avail < length && !eof()) {
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE);
}
if (avail > 0) {
memcpy(ret + copied, m_buffer + m_readpos, avail);
copied += avail;
length -= avail;
}
m_writepos = readImpl(m_buffer, CHUNK_SIZE);
m_readpos = 0;
avail = m_writepos - m_readpos;
if (avail == 0 || m_nonblocking) {
// For nonblocking mode, temporary out of data.
break;
}
}
avail = m_writepos - m_readpos;
if (avail > 0) {
int64_t n = length < avail ? length : avail;
memcpy(ret + copied, m_buffer + m_readpos, n);
m_readpos += n;
copied += n;
}
m_position += copied;
return s.setSize(copied);
}
int64_t File::write(CStrRef data, int64_t length /* = 0 */) {
if (seekable()) {
int64_t offset = m_readpos - m_writepos;
m_readpos = m_writepos = 0; // invalidating read buffer
seek(offset, SEEK_CUR);
}
if (length <= 0 || length > data.size()) {
length = data.size();
}
if (length) {
int64_t written = writeImpl(data.data(), length);
m_position += written;
return written;
}
return 0;
}
int File::putc(char c) {
char buf[1];
buf[0] = c;
int ret = writeImpl(buf, 1);
m_position += ret;
return ret;
}
bool File::seek(int64_t offset, int whence /* = SEEK_SET */) {
if (whence != SEEK_CUR) {
throw NotSupportedException(__func__, "cannot seek other than SEEK_CUR");
}
if (offset < 0) {
throw NotSupportedException(__func__, "cannot seek backwards");
}
if (offset > 0) {
int64_t avail = m_writepos - m_readpos;
assert(avail >= 0);
if (avail >= offset) {
m_readpos += offset;
return true;
}
if (avail > 0) {
m_readpos += avail;
offset -= avail;
}
while (offset) {
char tmp[1024];
int64_t nread = offset > (int64_t)sizeof(tmp) ? (int64_t)sizeof(tmp) : offset;
nread = readImpl(tmp, nread);
if (nread <= 0) {
return false;
}
offset -= nread;
}
}
return true;
}
int64_t File::tell() {
throw NotSupportedException(__func__, "cannot tell");
}
bool File::eof() {
throw NotSupportedException(__func__, "cannot test eof");
}
bool File::rewind() {
throw NotSupportedException(__func__, "cannot rewind");
}
bool File::flush() {
return true;
}
bool File::truncate(int64_t size) {
throw NotSupportedException(__func__, "cannot truncate");
}
bool File::lock(int operation) {
bool b = false;
return lock(operation, b);
}
bool File::lock(int operation, bool &wouldblock /* = false */) {
assert(m_fd >= 0);
wouldblock = false;
if (flock(m_fd, operation)) {
if (errno == EWOULDBLOCK) {
wouldblock = true;
}
return false;
}
return true;
}
const StaticString
s_wrapper_type("wrapper_type"),
s_stream_type("stream_type"),
s_mode("mode"),
s_unread_bytes("unread_bytes"),
s_seekable("seekable"),
s_uri("uri"),
s_timed_out("timed_out"),
s_blocked("blocked"),
s_eof("eof"),
s_wrapper_data("wrapper_data");
Array File::getMetaData() {
ArrayInit ret(10);
ret.set(s_wrapper_type, o_getClassName());
ret.set(s_stream_type, getStreamType());
ret.set(s_mode, String(m_mode));
ret.set(s_unread_bytes, 0);
ret.set(s_seekable, seekable());
ret.set(s_uri, String(m_name));
ret.set(s_timed_out, false);
ret.set(s_blocked, true);
ret.set(s_eof, eof());
ret.set(s_wrapper_data, getWrapperMetaData());
return ret.create();
}
///////////////////////////////////////////////////////////////////////////////
// utility functions
String File::readLine(int64_t maxlen /* = 0 */) {
size_t current_buf_size = 0;
size_t total_copied = 0;
char *ret = nullptr;
for (;;) {
int64_t avail = m_writepos - m_readpos;
if (avail > 0) {
int64_t cpysz = 0;
bool done = false;
char *readptr = m_buffer + m_readpos;
const char *eol;
const char *cr;
const char *lf;
cr = (const char *)memchr(readptr, '\r', avail);
lf = (const char *)memchr(readptr, '\n', avail);
if (cr && lf != cr + 1 && !(lf && lf < cr)) {
/* mac */
eol = cr;
} else if ((cr && lf && cr == lf - 1) || (lf)) {
/* dos or unix endings */
eol = lf;
} else {
eol = cr;
}
if (eol) {
cpysz = eol - readptr + 1;
done = true;
} else {
cpysz = avail;
}
if (maxlen > 0 && maxlen <= cpysz) {
cpysz = maxlen;
done = true;
}
current_buf_size += cpysz + 1;
if (ret) {
ret = (char *)realloc(ret, current_buf_size);
} else {
ret = (char *)malloc(current_buf_size);
}
memcpy(ret + total_copied, readptr, cpysz);
m_position += cpysz;
m_readpos += cpysz;
maxlen -= cpysz;
total_copied += cpysz;
if (done) {
break;
}
} else if (eof()) {
break;
} else {
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE);
}
m_writepos = readImpl(m_buffer, CHUNK_SIZE);
m_readpos = 0;
if (m_writepos - m_readpos == 0) {
break;
}
}
}
if (total_copied == 0) {
assert(ret == nullptr);
return String();
}
ret[total_copied] = '\0';
return String(ret, total_copied, AttachString);
}
String File::readRecord(CStrRef delimiter, int64_t maxlen /* = 0 */) {
if (eof() && m_writepos == m_readpos) {
return empty_string;
}
if (maxlen <= 0 || maxlen > CHUNK_SIZE) {
maxlen = CHUNK_SIZE;
}
int64_t avail = m_writepos - m_readpos;
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE * 3);
}
if (avail < maxlen && !eof()) {
assert(m_writepos + maxlen - avail <= CHUNK_SIZE * 3);
m_writepos += readImpl(m_buffer + m_writepos, maxlen - avail);
maxlen = m_writepos - m_readpos;
}
if (m_readpos >= CHUNK_SIZE) {
memcpy(m_buffer, m_buffer + m_readpos, m_writepos - m_readpos);
m_writepos -= m_readpos;
m_readpos = 0;
}
int64_t toread;
const char *e;
bool skip = false;
if (delimiter.empty()) {
toread = maxlen;
} else {
if (delimiter.size() == 1) {
e = (const char *)memchr(m_buffer + m_readpos, delimiter.charAt(0),
m_writepos - m_readpos);
} else {
int64_t pos = string_find(m_buffer + m_readpos, m_writepos - m_readpos,
delimiter.data(), delimiter.size(), 0, true);
if (pos >= 0) {
e = m_buffer + m_readpos + pos;
} else {
e = nullptr;
}
}
if (!e) {
toread = maxlen;
} else {
toread = e - m_buffer - m_readpos;
skip = true;
}
}
if (toread > maxlen && maxlen > 0) {
toread = maxlen;
}
if (toread >= 0) {
String s = String(toread, ReserveString);
char *buf = s.mutableSlice().ptr;
if (toread) {
memcpy(buf, m_buffer + m_readpos, toread);
}
m_readpos += toread;
if (skip) {
m_readpos += delimiter.size();
m_position += delimiter.size();
}
return s.setSize(toread);
}
return empty_string;
}
int64_t File::print() {
int64_t total = 0;
while (true) {
char buffer[1024];
int64_t len = readImpl(buffer, 1024);
if (len == 0) break;
total += len;
g_context->write(buffer, len);
}
return total;
}
int64_t File::printf(CStrRef format, CArrRef args) {
int len = 0;
char *output = string_printf(format.data(), format.size(), args, &len);
return write(String(output, len, AttachString));
}
///////////////////////////////////////////////////////////////////////////////
// csv functions
int64_t File::writeCSV(CArrRef fields, char delimiter_char /* = ',' */,
char enclosure_char /* = '"' */) {
int line = 0;
int count = fields.size();
const char escape_char = '\\';
StringBuffer csvline(1024);
for (ArrayIter iter(fields); iter; ++iter) {
String value = iter.second().toString();
bool need_enclosure = false;
for (int i = 0; i < value.size(); i++) {
char ch = value.charAt(i);
if (ch == delimiter_char || ch == enclosure_char || ch == escape_char ||
ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ') {
need_enclosure = true;
break;
}
}
if (need_enclosure) {
csvline.append(enclosure_char);
const char *ch = value.data();
const char *end = ch + value.size();
bool escaped = false;
while (ch < end) {
if (*ch == escape_char) {
escaped = true;
} else if (!escaped && *ch == enclosure_char) {
csvline.append(enclosure_char);
} else {
escaped = false;
}
csvline.append(*ch);
ch++;
}
csvline.append(enclosure_char);
} else {
csvline.append(value);
}
if (++line != count) {
csvline.append(delimiter_char);
}
}
csvline.append('\n');
return write(csvline.detach());
}
static const char *lookup_trailing_spaces(const char *ptr, int len) {
if (len > 0) {
ptr += len;
switch (*(ptr - 1)) {
case '\n':
if (len > 1 && *(ptr - 2) == '\r') {
return ptr - 2;
}
/* break is omitted intentionally */
case '\r':
return ptr - 1;
}
}
return ptr;
}
Array File::readCSV(int64_t length /* = 0 */, char delimiter_char /* = ',' */,
char enclosure_char /* = '"' */,
char escape_char /* = '\\' */) {
String line = readLine(length);
if (line.empty()) {
return Array();
}
String new_line;
const char *buf = line.data();
int64_t buf_len = line.size();
char *temp, *tptr, *line_end, *limit;
const char *bptr;
int64_t temp_len, line_end_len;
bool first_field = true;
/* Now into new section that parses buf for delimiter/enclosure fields */
/* Strip trailing space from buf, saving end of line in case required
for enclosure field */
bptr = buf;
tptr = (char *)lookup_trailing_spaces(buf, buf_len);
line_end_len = buf_len - (size_t)(tptr - buf);
line_end = limit = tptr;
/* reserve workspace for building each individual field */
temp_len = buf_len;
temp = (char *)malloc(temp_len + line_end_len + 1);
/* Initialize return array */
Array ret;
/* Main loop to read CSV fields */
/* NB this routine will return a single null entry for a blank line */
do {
char *comp_end;
const char *hunk_begin;
tptr = temp;
/* 1. Strip any leading space */
for (; bptr < limit; ++bptr) {
if (!isspace((int)*(unsigned char *)bptr) || *bptr == delimiter_char) {
break;
}
}
if (first_field && bptr == line_end) {
ret.append(null_variant);
break;
}
first_field = false;
/* 2. Read field, leaving bptr pointing at start of next field */
if (bptr < limit && *bptr == enclosure_char) {
int state = 0;
bptr++; /* move on to first character in field */
hunk_begin = bptr;
/* 2A. handle enclosure delimited field */
int inc_len = 1;
for (;;) {
switch (inc_len) {
case 0:
switch (state) {
case 2:
memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
tptr += (bptr - hunk_begin - 1);
hunk_begin = bptr;
goto quit_loop_2;
case 1:
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
hunk_begin = bptr;
/* break is omitted intentionally */
case 0:
{
if (hunk_begin != line_end) {
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
hunk_begin = bptr;
}
/* add the embedded line end to the field */
memcpy(tptr, line_end, line_end_len);
tptr += line_end_len;
new_line = readLine(length);
const char *new_buf = new_line.data();
int64_t new_len = new_line.size();
if (new_len == 0) {
/* we've got an unterminated enclosure,
* assign all the data from the start of
* the enclosure to end of data to the
* last element */
if ((size_t)temp_len > (size_t)(limit - buf)) {
goto quit_loop_2;
}
return ret;
}
temp_len += new_len;
char *new_temp = (char*)realloc(temp, temp_len);
tptr = new_temp + (size_t)(tptr - temp);
temp = new_temp;
buf_len = new_len;
bptr = buf = new_buf;
hunk_begin = buf;
line_end = limit = (char *)lookup_trailing_spaces(buf, buf_len);
line_end_len = buf_len - (size_t)(limit - buf);
state = 0;
}
break;
}
break;
case 1:
/* we need to determine if the enclosure is
* 'real' or is it escaped */
switch (state) {
case 1: /* escaped */
bptr++;
state = 0;
break;
case 2: /* embedded enclosure ? let's check it */
if (*bptr != enclosure_char) {
/* real enclosure */
memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
tptr += (bptr - hunk_begin - 1);
hunk_begin = bptr;
goto quit_loop_2;
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
bptr++;
hunk_begin = bptr;
state = 0;
break;
default:
if (*bptr == escape_char) {
state = 1;
} else if (*bptr == enclosure_char) {
state = 2;
}
bptr++;
break;
}
break;
}
inc_len = (bptr < limit ? 1 : 0);
}
quit_loop_2:
/* look up for a delimiter */
for (; bptr < limit; ++bptr) {
if (*bptr == delimiter_char) {
break;
}
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
if (bptr < limit) ++bptr;
comp_end = tptr;
} else {
/* 2B. Handle non-enclosure field */
hunk_begin = bptr;
for (; bptr < limit; ++bptr) {
if (*bptr == delimiter_char) {
break;
}
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
comp_end = (char *)lookup_trailing_spaces(temp, tptr - temp);
if (*bptr == delimiter_char) {
bptr++;
}
}
/* 3. Now pass our field back to php */
*comp_end = '\0';
ret.append(String(temp, comp_end - temp, CopyString));
} while (bptr < limit);
free(temp);
return ret;
}
String File::getLastError() {
return Util::safe_strerror(errno);
}
///////////////////////////////////////////////////////////////////////////////
}