Arquivos
hhvm/hphp/runtime/base/file.cpp
T
Paul Tarjan 15b04a6097 delegate some syscalls to the stream wrappers
Needed for phars. People are doing

  file_exists('phar://a.phar/b.php');

I didn't do all syscalls like `chmod` or `symlink`. Think I should bite the bullet, or do it piecemeal when needed?
2013-07-23 11:44:31 -07:00

779 linhas
21 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
| Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/base/file.h"
#include "hphp/runtime/base/complex_types.h"
#include "hphp/runtime/base/string_buffer.h"
#include "hphp/runtime/base/type_conversions.h"
#include "hphp/runtime/base/builtin_functions.h"
#include "hphp/runtime/server/static_content_cache.h"
#include "hphp/runtime/server/virtual_host.h"
#include "hphp/runtime/base/runtime_option.h"
#include "hphp/runtime/base/runtime_error.h"
#include "hphp/runtime/base/array_init.h"
#include "hphp/util/logger.h"
#include "hphp/util/process.h"
#include "hphp/util/util.h"
#include "hphp/runtime/base/zend_string.h"
#include "hphp/runtime/base/zend_printf.h"
#include "hphp/runtime/base/exceptions.h"
#include <sys/file.h>
#include "hphp/runtime/base/array_iterator.h"
#include "hphp/runtime/base/stream_wrapper_registry.h"
namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
// statics
StaticString File::s_class_name("File");
StaticString File::s_resource_name("stream");
IMPLEMENT_REQUEST_LOCAL(FileData, s_file_data);
const int File::USE_INCLUDE_PATH = 1;
String File::TranslatePath(CStrRef filename, bool useFileCache /* = false */,
bool keepRelative /*= false */) {
String canonicalized(Util::canonicalize(filename.data(),
filename.size()), AttachString);
if (useFileCache) {
String translated = TranslatePath(canonicalized, false);
if (!translated.empty() && access(translated.data(), F_OK) < 0 &&
StaticContentCache::TheFileCache) {
if (StaticContentCache::TheFileCache->exists(canonicalized.data(),
false)) {
// we use file cache's file name to make stat() work
translated = String(RuntimeOption::FileCache);
}
}
return translated;
}
if (RuntimeOption::SafeFileAccess) {
const vector<string> &allowedDirectories =
VirtualHost::GetAllowedDirectories();
auto it = std::upper_bound(allowedDirectories.begin(),
allowedDirectories.end(), canonicalized,
[](CStrRef val, const string& dir) {
return strcmp(val.c_str(), dir.c_str()) < 0;
});
if (it != allowedDirectories.begin()) {
const string& dir = *--it;
if (dir.size() <= canonicalized.size() &&
!strncmp(dir.c_str(), canonicalized.c_str(), dir.size())) {
return canonicalized;
}
}
// disallow access with an absolute path
if (canonicalized.charAt(0) == '/') {
return "";
}
// unresolvable paths are all considered as unsafe
if (canonicalized.find("..") >= 0) {
assert(canonicalized.find("..") == 0);
return "";
}
}
if (canonicalized.charAt(0) == '/' || keepRelative) {
return canonicalized;
}
String cwd = g_context->getCwd();
if (!cwd.empty() && cwd[cwd.length() - 1] == '/') {
return cwd + canonicalized;
}
return cwd + "/" + canonicalized;
}
String File::TranslateCommand(CStrRef cmd) {
//TODO: security checking
return cmd;
}
bool File::IsVirtualDirectory(CStrRef filename) {
if (StaticContentCache::TheFileCache &&
StaticContentCache::TheFileCache->dirExists(filename.data(), false)) {
return true;
}
return false;
}
bool File::IsPlainFilePath(CStrRef filename) {
return filename.find("://") == String::npos;
}
Variant File::Open(CStrRef filename, CStrRef mode,
int options /* = 0 */,
CVarRef context /* = null */) {
Stream::Wrapper *wrapper = Stream::getWrapperFromURI(filename);
File *file = wrapper->open(filename, mode, options, context);
if (file != nullptr) {
file->m_name = filename.data();
file->m_mode = mode.data();
return Resource(file);
}
return false;
}
///////////////////////////////////////////////////////////////////////////////
// constructor and destructor
File::File(bool nonblocking)
: m_fd(-1), m_closed(false), m_nonblocking(nonblocking), m_writepos(0),
m_readpos(0), m_position(0), m_buffer(nullptr) {
}
File::~File() {
closeImpl();
}
void File::closeImpl() {
if (m_buffer) {
free(m_buffer);
m_buffer = nullptr;
}
}
///////////////////////////////////////////////////////////////////////////////
// default implementation of virtual functions
int File::getc() {
if (m_writepos > m_readpos) {
m_position++;
return m_buffer[m_readpos++] & 0xff;
}
char buffer[1];
int64_t len = readImpl(buffer, 1);
if (len != 1) {
return EOF;
}
m_position += len;
return (int)(unsigned char)buffer[0];
}
String File::read(int64_t length) {
if (length <= 0) {
raise_notice("Invalid length %" PRId64, length);
return "";
}
String s = String(length, ReserveString);
char *ret = s.mutableSlice().ptr;
int64_t copied = 0;
int64_t avail = m_writepos - m_readpos;
while (avail < length && !eof()) {
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE);
}
if (avail > 0) {
memcpy(ret + copied, m_buffer + m_readpos, avail);
copied += avail;
length -= avail;
}
m_writepos = readImpl(m_buffer, CHUNK_SIZE);
m_readpos = 0;
avail = m_writepos - m_readpos;
if (avail == 0 || m_nonblocking) {
// For nonblocking mode, temporary out of data.
break;
}
}
avail = m_writepos - m_readpos;
if (avail > 0) {
int64_t n = length < avail ? length : avail;
memcpy(ret + copied, m_buffer + m_readpos, n);
m_readpos += n;
copied += n;
}
m_position += copied;
return s.setSize(copied);
}
int64_t File::write(CStrRef data, int64_t length /* = 0 */) {
if (seekable()) {
int64_t offset = m_readpos - m_writepos;
m_readpos = m_writepos = 0; // invalidating read buffer
seek(offset, SEEK_CUR);
}
if (length <= 0 || length > data.size()) {
length = data.size();
}
if (length) {
int64_t written = writeImpl(data.data(), length);
m_position += written;
return written;
}
return 0;
}
int File::putc(char c) {
char buf[1];
buf[0] = c;
int ret = writeImpl(buf, 1);
m_position += ret;
return ret;
}
bool File::seek(int64_t offset, int whence /* = SEEK_SET */) {
if (whence != SEEK_CUR) {
throw NotSupportedException(__func__, "cannot seek other than SEEK_CUR");
}
if (offset < 0) {
throw NotSupportedException(__func__, "cannot seek backwards");
}
if (offset > 0) {
int64_t avail = m_writepos - m_readpos;
assert(avail >= 0);
if (avail >= offset) {
m_readpos += offset;
return true;
}
if (avail > 0) {
m_readpos += avail;
offset -= avail;
}
while (offset) {
char tmp[1024];
int64_t nread = offset > (int64_t)sizeof(tmp) ? (int64_t)sizeof(tmp) : offset;
nread = readImpl(tmp, nread);
if (nread <= 0) {
return false;
}
offset -= nread;
}
}
return true;
}
int64_t File::tell() {
throw NotSupportedException(__func__, "cannot tell");
}
bool File::eof() {
throw NotSupportedException(__func__, "cannot test eof");
}
bool File::rewind() {
throw NotSupportedException(__func__, "cannot rewind");
}
bool File::flush() {
return true;
}
bool File::truncate(int64_t size) {
throw NotSupportedException(__func__, "cannot truncate");
}
bool File::lock(int operation) {
bool b = false;
return lock(operation, b);
}
bool File::lock(int operation, bool &wouldblock /* = false */) {
assert(m_fd >= 0);
wouldblock = false;
if (flock(m_fd, operation)) {
if (errno == EWOULDBLOCK) {
wouldblock = true;
}
return false;
}
return true;
}
const StaticString
s_wrapper_type("wrapper_type"),
s_stream_type("stream_type"),
s_mode("mode"),
s_unread_bytes("unread_bytes"),
s_seekable("seekable"),
s_uri("uri"),
s_timed_out("timed_out"),
s_blocked("blocked"),
s_eof("eof"),
s_wrapper_data("wrapper_data");
Array File::getMetaData() {
ArrayInit ret(10);
ret.set(s_wrapper_type, o_getClassName());
ret.set(s_stream_type, getStreamType());
ret.set(s_mode, String(m_mode));
ret.set(s_unread_bytes, 0);
ret.set(s_seekable, seekable());
ret.set(s_uri, String(m_name));
ret.set(s_timed_out, false);
ret.set(s_blocked, true);
ret.set(s_eof, eof());
ret.set(s_wrapper_data, getWrapperMetaData());
return ret.create();
}
///////////////////////////////////////////////////////////////////////////////
// utility functions
String File::readLine(int64_t maxlen /* = 0 */) {
size_t current_buf_size = 0;
size_t total_copied = 0;
char *ret = nullptr;
for (;;) {
int64_t avail = m_writepos - m_readpos;
if (avail > 0) {
int64_t cpysz = 0;
bool done = false;
char *readptr = m_buffer + m_readpos;
const char *eol;
const char *cr;
const char *lf;
cr = (const char *)memchr(readptr, '\r', avail);
lf = (const char *)memchr(readptr, '\n', avail);
if (cr && lf != cr + 1 && !(lf && lf < cr)) {
/* mac */
eol = cr;
} else if ((cr && lf && cr == lf - 1) || (lf)) {
/* dos or unix endings */
eol = lf;
} else {
eol = cr;
}
if (eol) {
cpysz = eol - readptr + 1;
done = true;
} else {
cpysz = avail;
}
if (maxlen > 0 && maxlen <= cpysz) {
cpysz = maxlen;
done = true;
}
current_buf_size += cpysz + 1;
if (ret) {
ret = (char *)realloc(ret, current_buf_size);
} else {
ret = (char *)malloc(current_buf_size);
}
memcpy(ret + total_copied, readptr, cpysz);
m_position += cpysz;
m_readpos += cpysz;
maxlen -= cpysz;
total_copied += cpysz;
if (done) {
break;
}
} else if (eof()) {
break;
} else {
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE);
}
m_writepos = readImpl(m_buffer, CHUNK_SIZE);
m_readpos = 0;
if (m_writepos - m_readpos == 0) {
break;
}
}
}
if (total_copied == 0) {
assert(ret == nullptr);
return String();
}
ret[total_copied] = '\0';
return String(ret, total_copied, AttachString);
}
String File::readRecord(CStrRef delimiter, int64_t maxlen /* = 0 */) {
if (eof() && m_writepos == m_readpos) {
return empty_string;
}
if (maxlen <= 0 || maxlen > CHUNK_SIZE) {
maxlen = CHUNK_SIZE;
}
int64_t avail = m_writepos - m_readpos;
if (m_buffer == nullptr) {
m_buffer = (char *)malloc(CHUNK_SIZE * 3);
}
if (avail < maxlen && !eof()) {
assert(m_writepos + maxlen - avail <= CHUNK_SIZE * 3);
m_writepos += readImpl(m_buffer + m_writepos, maxlen - avail);
maxlen = m_writepos - m_readpos;
}
if (m_readpos >= CHUNK_SIZE) {
memcpy(m_buffer, m_buffer + m_readpos, m_writepos - m_readpos);
m_writepos -= m_readpos;
m_readpos = 0;
}
int64_t toread;
const char *e;
bool skip = false;
if (delimiter.empty()) {
toread = maxlen;
} else {
if (delimiter.size() == 1) {
e = (const char *)memchr(m_buffer + m_readpos, delimiter.charAt(0),
m_writepos - m_readpos);
} else {
int64_t pos = string_find(m_buffer + m_readpos, m_writepos - m_readpos,
delimiter.data(), delimiter.size(), 0, true);
if (pos >= 0) {
e = m_buffer + m_readpos + pos;
} else {
e = nullptr;
}
}
if (!e) {
toread = maxlen;
} else {
toread = e - m_buffer - m_readpos;
skip = true;
}
}
if (toread > maxlen && maxlen > 0) {
toread = maxlen;
}
if (toread >= 0) {
String s = String(toread, ReserveString);
char *buf = s.mutableSlice().ptr;
if (toread) {
memcpy(buf, m_buffer + m_readpos, toread);
}
m_readpos += toread;
if (skip) {
m_readpos += delimiter.size();
m_position += delimiter.size();
}
return s.setSize(toread);
}
return empty_string;
}
int64_t File::print() {
int64_t total = 0;
while (true) {
char buffer[1024];
int64_t len = readImpl(buffer, 1024);
if (len == 0) break;
total += len;
g_context->write(buffer, len);
}
return total;
}
int64_t File::printf(CStrRef format, CArrRef args) {
int len = 0;
char *output = string_printf(format.data(), format.size(), args, &len);
return write(String(output, len, AttachString));
}
///////////////////////////////////////////////////////////////////////////////
// csv functions
int64_t File::writeCSV(CArrRef fields, char delimiter_char /* = ',' */,
char enclosure_char /* = '"' */) {
int line = 0;
int count = fields.size();
const char escape_char = '\\';
StringBuffer csvline(1024);
for (ArrayIter iter(fields); iter; ++iter) {
String value = iter.second().toString();
bool need_enclosure = false;
for (int i = 0; i < value.size(); i++) {
char ch = value.charAt(i);
if (ch == delimiter_char || ch == enclosure_char || ch == escape_char ||
ch == '\n' || ch == '\r' || ch == '\t' || ch == ' ') {
need_enclosure = true;
break;
}
}
if (need_enclosure) {
csvline.append(enclosure_char);
const char *ch = value.data();
const char *end = ch + value.size();
bool escaped = false;
while (ch < end) {
if (*ch == escape_char) {
escaped = true;
} else if (!escaped && *ch == enclosure_char) {
csvline.append(enclosure_char);
} else {
escaped = false;
}
csvline.append(*ch);
ch++;
}
csvline.append(enclosure_char);
} else {
csvline.append(value);
}
if (++line != count) {
csvline.append(delimiter_char);
}
}
csvline.append('\n');
return write(csvline.detach());
}
static const char *lookup_trailing_spaces(const char *ptr, int len) {
if (len > 0) {
ptr += len;
switch (*(ptr - 1)) {
case '\n':
if (len > 1 && *(ptr - 2) == '\r') {
return ptr - 2;
}
/* break is omitted intentionally */
case '\r':
return ptr - 1;
}
}
return ptr;
}
Array File::readCSV(int64_t length /* = 0 */, char delimiter_char /* = ',' */,
char enclosure_char /* = '"' */,
char escape_char /* = '\\' */) {
String line = readLine(length);
if (line.empty()) {
return Array();
}
String new_line;
const char *buf = line.data();
int64_t buf_len = line.size();
char *temp, *tptr, *line_end, *limit;
const char *bptr;
int64_t temp_len, line_end_len;
bool first_field = true;
/* Now into new section that parses buf for delimiter/enclosure fields */
/* Strip trailing space from buf, saving end of line in case required
for enclosure field */
bptr = buf;
tptr = (char *)lookup_trailing_spaces(buf, buf_len);
line_end_len = buf_len - (size_t)(tptr - buf);
line_end = limit = tptr;
/* reserve workspace for building each individual field */
temp_len = buf_len;
temp = (char *)malloc(temp_len + line_end_len + 1);
/* Initialize return array */
Array ret;
/* Main loop to read CSV fields */
/* NB this routine will return a single null entry for a blank line */
do {
char *comp_end;
const char *hunk_begin;
tptr = temp;
/* 1. Strip any leading space */
for (; bptr < limit; ++bptr) {
if (!isspace((int)*(unsigned char *)bptr) || *bptr == delimiter_char) {
break;
}
}
if (first_field && bptr == line_end) {
ret.append(null_variant);
break;
}
first_field = false;
/* 2. Read field, leaving bptr pointing at start of next field */
if (bptr < limit && *bptr == enclosure_char) {
int state = 0;
bptr++; /* move on to first character in field */
hunk_begin = bptr;
/* 2A. handle enclosure delimited field */
int inc_len = 1;
for (;;) {
switch (inc_len) {
case 0:
switch (state) {
case 2:
memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
tptr += (bptr - hunk_begin - 1);
hunk_begin = bptr;
goto quit_loop_2;
case 1:
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
hunk_begin = bptr;
/* break is omitted intentionally */
case 0:
{
if (hunk_begin != line_end) {
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
hunk_begin = bptr;
}
/* add the embedded line end to the field */
memcpy(tptr, line_end, line_end_len);
tptr += line_end_len;
new_line = readLine(length);
const char *new_buf = new_line.data();
int64_t new_len = new_line.size();
if (new_len == 0) {
/* we've got an unterminated enclosure,
* assign all the data from the start of
* the enclosure to end of data to the
* last element */
if ((size_t)temp_len > (size_t)(limit - buf)) {
goto quit_loop_2;
}
return ret;
}
temp_len += new_len;
char *new_temp = (char*)realloc(temp, temp_len);
tptr = new_temp + (size_t)(tptr - temp);
temp = new_temp;
buf_len = new_len;
bptr = buf = new_buf;
hunk_begin = buf;
line_end = limit = (char *)lookup_trailing_spaces(buf, buf_len);
line_end_len = buf_len - (size_t)(limit - buf);
state = 0;
}
break;
}
break;
case 1:
/* we need to determine if the enclosure is
* 'real' or is it escaped */
switch (state) {
case 1: /* escaped */
bptr++;
state = 0;
break;
case 2: /* embedded enclosure ? let's check it */
if (*bptr != enclosure_char) {
/* real enclosure */
memcpy(tptr, hunk_begin, bptr - hunk_begin - 1);
tptr += (bptr - hunk_begin - 1);
hunk_begin = bptr;
goto quit_loop_2;
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
bptr++;
hunk_begin = bptr;
state = 0;
break;
default:
if (*bptr == escape_char) {
state = 1;
} else if (*bptr == enclosure_char) {
state = 2;
}
bptr++;
break;
}
break;
}
inc_len = (bptr < limit ? 1 : 0);
}
quit_loop_2:
/* look up for a delimiter */
for (; bptr < limit; ++bptr) {
if (*bptr == delimiter_char) {
break;
}
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
if (bptr < limit) ++bptr;
comp_end = tptr;
} else {
/* 2B. Handle non-enclosure field */
hunk_begin = bptr;
for (; bptr < limit; ++bptr) {
if (*bptr == delimiter_char) {
break;
}
}
memcpy(tptr, hunk_begin, bptr - hunk_begin);
tptr += (bptr - hunk_begin);
comp_end = (char *)lookup_trailing_spaces(temp, tptr - temp);
if (*bptr == delimiter_char) {
bptr++;
}
}
/* 3. Now pass our field back to php */
*comp_end = '\0';
ret.append(String(temp, comp_end - temp, CopyString));
} while (bptr < limit);
free(temp);
return ret;
}
String File::getLastError() {
return Util::safe_strerror(errno);
}
///////////////////////////////////////////////////////////////////////////////
}