hhvm/hphp/runtime/base/hphp_array.h

/*
   +----------------------------------------------------------------------+
   | HipHop for PHP                                                       |
   +----------------------------------------------------------------------+
   | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com)     |
   | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.00 of the Zend license,     |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.zend.com/license/2_00.txt.                                |
   | If you did not receive a copy of the Zend license and are unable to  |
   | obtain it through the world-wide-web, please send a note to          |
   | license@zend.com so we can mail you a copy immediately.              |
   +----------------------------------------------------------------------+
*/

#ifndef incl_HPHP_HPHP_ARRAY_H_
#define incl_HPHP_HPHP_ARRAY_H_

#include "hphp/runtime/base/types.h"
#include "hphp/runtime/base/array_data.h"
#include "hphp/runtime/base/smart_allocator.h"
#include "hphp/runtime/base/complex_types.h"

namespace HPHP {
///////////////////////////////////////////////////////////////////////////////

class ArrayInit;

class HphpArray : public ArrayData {
  enum SortFlavor { IntegerSort, StringSort, GenericSort };
public:
  friend class ArrayInit;

  // Load factor scaler.  If S is the # of elements, C is the
  // power-of-2 capacity, and L=LoadScale, we grow when S > C-C/L.
  // So 2 gives 0.5 load factor, 4 gives 0.75 load factor, 8 gives
  // 0.125 load factor.  Use powers of 2 to enable shift-divide.
  static const uint LoadScale = 4;

public:
  static HphpArray* GetStaticEmptyArray() {
    return &s_theEmptyArray;
  }

private:
  enum class CopyVector {};
  HphpArray(const HphpArray& other, AllocationMode, CopyVector);

  enum class CopyGeneric {};
  HphpArray(const HphpArray& other, AllocationMode, CopyGeneric);

  // convert in-place from kVector to kHphpArray: fill in keys & hashtable
  HphpArray* vectorToGeneric();

  // Safe downcast helpers
  static HphpArray* asVector(ArrayData* ad);
  static const HphpArray* asVector(const ArrayData* ad);
  static HphpArray* asHphpArray(ArrayData* ad);
  static const HphpArray* asHphpArray(const ArrayData* ad);

public:
  // Create an empty array with enough capacity for nSize elements.
  explicit HphpArray(uint nSize);

  // Create and initialize an array with size elements, populated by
  // moving (without refcounting) and reversing vals.
  HphpArray(uint size, const TypedValue* vals); // make tuple

  virtual ~HphpArray();
  void destroyVec();
  void destroy();

  // unlike ArrayData::size(), this functions doesn't delegate
  // to the virtual vsize() functions, so its more efficient to
  // use this when you know you have an HphpArray.
  ssize_t getSize() const {
    return m_size;
  }

  // This behaves the same as iter_begin except that it assumes
  // this array is not empty and its not virtual.
  ssize_t getIterBegin() const {
    assert(!empty());
    if (LIKELY(!isTombstone(m_data[0].data.m_type))) {
      return 0;
    }
    return nextElm(m_data, 0);
  }

  // override/implement ArrayData api's

  // these using directives ensure the full set of overloaded functions
  // are visible in this class, to avoid triggering implicit conversions
  // from a CVarRef key to int64.
  using ArrayData::exists;
  using ArrayData::lval;
  using ArrayData::lvalNew;
  using ArrayData::createLvalPtr;
  using ArrayData::getLvalPtr;
  using ArrayData::set;
  using ArrayData::setRef;
  using ArrayData::add;
  using ArrayData::addLval;
  using ArrayData::remove;
  using ArrayData::nvGet;

  // implements ArrayData
  ssize_t vsize() const;
  CVarRef getValueRef(ssize_t pos) const;

  // overrides ArrayData
  bool isVectorData() const;
  ssize_t iter_begin() const;
  ssize_t iter_end() const;
  ssize_t iter_advance(ssize_t prev) const;
  ssize_t iter_rewind(ssize_t prev) const;

  // implements ArrayData
  bool exists(int64_t k) const;
  bool exists(const StringData* k) const;

  // implements ArrayData
  ArrayData* lval(int64_t k, Variant*& ret, bool copy);
  ArrayData* lval(StringData* k, Variant*& ret, bool copy);
  ArrayData* lvalNew(Variant*& ret, bool copy);

  // overrides ArrayData
  ArrayData* createLvalPtr(StringData* k, Variant*& ret, bool copy);
  ArrayData* getLvalPtr(StringData* k, Variant*& ret, bool copy);

  // implements ArrayData
  static ArrayData* SetIntVec(ArrayData*, int64_t k, CVarRef v, bool copy);
  static ArrayData* SetStrVec(ArrayData*, StringData* k, CVarRef v, bool copy);
  static ArrayData* SetInt(ArrayData*, int64_t k, CVarRef v, bool copy);
  static ArrayData* SetStr(ArrayData*, StringData* k, CVarRef v, bool copy);

  // implements ArrayData
  ArrayData* setRef(int64_t k, CVarRef v, bool copy);
  ArrayData* setRef(StringData* k, CVarRef v, bool copy);

  // overrides ArrayData
  ArrayData *add(int64_t k, CVarRef v, bool copy);
  ArrayData *add(StringData* k, CVarRef v, bool copy);
  ArrayData *addLval(int64_t k, Variant*& ret, bool copy);
  ArrayData *addLval(StringData* k, Variant*& ret, bool copy);

  // implements ArrayData
  ArrayData* remove(int64_t k, bool copy);
  ArrayData* remove(const StringData* k, bool copy);

  // overrides/implements ArrayData
  ArrayData* copy() const;
  ArrayData* copyWithStrongIterators() const;

  ArrayData* nonSmartCopy() const;
  HphpArray* copyImpl() const;
  HphpArray* copyVec() const;
  HphpArray* copyGeneric() const;

  static ArrayData* AppendVec(ArrayData*, CVarRef v, bool copy);
  static ArrayData* Append(ArrayData*, CVarRef v, bool copy);
  ArrayData* appendRef(CVarRef v, bool copy);
  ArrayData* appendWithRef(CVarRef v, bool copy);
  ArrayData* plus(const ArrayData* elems, bool copy);
  ArrayData* merge(const ArrayData* elems, bool copy);
  ArrayData* pop(Variant& value);
  ArrayData* dequeue(Variant& value);
  ArrayData* prepend(CVarRef v, bool copy);
  void renumber();
  void onSetEvalScalar();

  // overrides ArrayData
  bool validFullPos(const FullPos &fp) const;
  bool advanceFullPos(FullPos& fp);

  // END overide/implements section

  // nvGet and friends.
  // "nv" stands for non-variant. If we know the types of keys and values
  // through runtime and compile-time chicanery, we can directly call these
  // methods.

  // nvGet returns a pointer to the value if the specified key is in the
  // array, NULL otherwise.
  static TypedValue* NvGetIntVec(const ArrayData*, int64_t ki);
  static TypedValue* NvGetInt(const ArrayData*, int64_t ki);
  static TypedValue* NvGetStrVec(const ArrayData*, const StringData* k);
  static TypedValue* NvGetStr(const ArrayData*, const StringData* k);

  void nvBind(int64_t ki, const TypedValue* v) {
    updateRef(ki, tvAsCVarRef(v));
  }
  void nvBind(StringData* k, const TypedValue* v) {
    updateRef(k, tvAsCVarRef(v));
  }
  void nvAppend(const TypedValue* v) {
    nextInsertVec(tvAsCVarRef(v));
  }
  ArrayData* nvNew(TypedValue*& v, bool copy);
  static void NvGetKeyVec(const ArrayData*, TypedValue* out, ssize_t pos);
  static void NvGetKey(const ArrayData*, TypedValue* out, ssize_t pos);
  bool nvInsert(StringData* k, TypedValue *v);

  /**
   * Main helper for AddNewElemC.  The semantics are slightly different from
   * other helpers, but tuned for the opcode.  The value to set is passed by
   * value; the caller has incref'd it if necessary, and this call *moves* it
   * to its location in the array (caller must not decref).  If the value cannot
   * be stored in the array, this helper decref's it.
   */
  static ArrayData* AddNewElemC(ArrayData* a, TypedValue value);

private:
  template <typename AccessorT>
  SortFlavor preSort(const AccessorT& acc, bool checkTypes);

  void postSort(bool resetKeys);

public:
  ArrayData* escalateForSort();
  void ksort(int sort_flags, bool ascending);
  void sort(int sort_flags, bool ascending);
  void asort(int sort_flags, bool ascending);
  void uksort(CVarRef cmp_function);
  void usort(CVarRef cmp_function);
  void uasort(CVarRef cmp_function);

  // Elm's data.m_type == KindOfInvalid for deleted slots.
  static bool isTombstone(DataType t) {
    return t < KindOfUninit;
    static_assert(KindOfUninit == 0 && KindOfInvalid < 0, "");
  }

  // Array element.
  struct Elm {
    /* The key is either a string pointer or an int value, and the _count
     * field in data is used to discriminate the key type. _count = 0 means
     * int, nonzero values contain 32 bits of a string's hashcode.
     * It is critical that when we return &data to clients, that they not
     * read or write the _count field! */
    union {
      int64_t ikey;
      StringData* key;
    };
    // We store values here, but also some information local to this array:
    // data.m_aux.u_hash contains either 0 (for an int key) or a string
    // hashcode; the high bit is the int/string key descriminator.
    // data.m_type == KindOfInvalid if this is an empty slot in the
    // array (e.g. after a key is deleted).
    TypedValueAux data;
    bool hasStrKey() const {
      return data.hash() != 0;
    }
    bool hasIntKey() const {
      return data.hash() == 0;
    }
    int32_t hash() const {
      return data.hash();
    }
    void setStrKey(StringData* k, strhash_t h) {
      key = k;
      data.hash() = int32_t(h) | 0x80000000;
    }
    void setIntKey(int64_t k) {
      ikey = k;
      data.hash() = 0;
    }
  };

  struct ElmKey {
    ElmKey() {}
    ElmKey(int32_t hash, StringData* key) {
      this->hash = hash;
      this->key = key;
    }
    int32_t hash;
    union {
      StringData* key;
      int64_t ikey;
    };
  };

  // Element index, with special values < 0 used for hash tables.
  // NOTE: Unfortunately, g++ on x64 tends to generate worse machine code for
  // 32-bit ints than it does for 64-bit ints. As such, we have deliberately
  // chosen to use ssize_t in some places where ideally we *should* have used
  // ElmInd.
  typedef int32_t ElmInd;
  static const ElmInd ElmIndEmpty      = -1; // == ArrayData::invalid_index
  static const ElmInd ElmIndTombstone  = -2;

  // Use a minimum of an 4-element hash table.  Valid range: [2..32]
  static const uint32_t MinLgTableSize = 2;
  static const uint32_t SmallHashSize = 1 << MinLgTableSize;
  static const uint32_t SmallMask = SmallHashSize - 1;
  static const uint32_t SmallSize = SmallHashSize - SmallHashSize / LoadScale;

  uint32_t iterLimit() const { return m_used; }

  // Fetch a value and optional key (if keyPos != nullptr), given an
  // iterator pos.  If withref is true, copy the value with "withRef"
  // semantics, and decref the previous key before copying the key.
  // Otherwise get the value cell (unboxing), and initialize keyOut.
  template <bool withRef>
  void getArrayElm(ssize_t pos, TypedValue* out, TypedValue* keyOut) const;
  bool isTombstone(ssize_t pos) const;

private:
  // Small: Array elements and the hash table are allocated inline.
  //
  //            +--------------------+
  // this -->   | HphpArray fields   |
  //            +--------------------+
  // m_data --> | slot 0 ...         | SmallSize slots for elements.
  //            | slot SmallSize-1   |
  //            +--------------------+
  // m_hash --> |                    | 2^MinLgTableSize hash table entries.
  //            +--------------------+
  //
  // Medium: Just the hash table is allocated inline, array elements
  // are allocated from malloc.
  //
  //            +--------------------+
  // this -->   | HphpArray fields   |
  //            +--------------------+
  // m_hash --> |                    | 2^K hash table entries
  //            +--------------------+
  //
  //            +--------------------+
  // m_data --> | slot 0             | 0.75 * 2^K slots for elements.
  //            | slot 1             |
  //            | ...                |
  //            +--------------------+
  //
  // Big: Array elements and the hash table are contiguously allocated, and
  // elements are pointer aligned.
  //
  //            +--------------------+
  // m_data --> | slot 0             | 0.75 * 2^K slots for elements.
  //            | slot 1             |
  //            | ...                |
  //            +--------------------+
  // m_hash --> |                    | 2^K hash table entries.
  //            +--------------------+

  uint32_t m_used;       // Number of used elements (values or tombstones)
  uint32_t m_cap;        // Number of Elms we can use before having to grow.
  uint32_t m_tableMask;  // Bitmask used when indexing into the hash table.
  uint32_t m_hLoad;      // Hash table load (# of non-empty slots).
  int64_t  m_nextKI;     // Next integer key to use for append.
  Elm*     m_data;       // Contains elements and hash table.
  ElmInd*  m_hash;       // Hash table.
  union {
    struct {
      Elm slots[SmallSize];
      ElmInd hash[SmallHashSize];
    } m_inline_data;
    ElmInd m_inline_hash[sizeof(m_inline_data) / sizeof(ElmInd)];
  };

  ssize_t nextElm(Elm* elms, ssize_t ei) const {
    assert(ei >= -1);
    while (size_t(++ei) < m_used) {
      if (!isTombstone(elms[ei].data.m_type)) {
        return ei;
      }
    }
    return (ssize_t)ElmIndEmpty;
  }
  ssize_t prevElm(Elm* elms, ssize_t ei) const;

  // Assert a bunch of invariants about this array then return true.
  // usage:  assert(checkInvariants());
  bool checkInvariants() const;

  static void getElmKey(const Elm& e, TypedValue* out);
  ssize_t find(int64_t ki) const;
  ssize_t find(const StringData* s, strhash_t prehash) const;
  ElmInd* findForInsert(int64_t ki) const;
  ElmInd* findForInsert(const StringData* k, strhash_t prehash) const;

  ssize_t iter_advance_helper(ssize_t prev) const ATTRIBUTE_COLD;

  /**
   * findForNewInsert() CANNOT be used unless the caller can guarantee that
   * the relevant key is not already present in the array. Otherwise this can
   * put the array into a bad state; use with caution.
   */
  ElmInd* findForNewInsert(size_t h0) const;
  ElmInd* findForNewInsertLoop(size_t tableMask, size_t h0) const;

  bool nextInsert(CVarRef data);
  HphpArray* nextInsertVec(CVarRef data);
  ArrayData* nextInsertRef(CVarRef data);
  ArrayData* nextInsertWithRef(CVarRef data);
  ArrayData* addLvalImpl(int64_t ki, Variant** pDest);
  ArrayData* addLvalImpl(StringData* key, strhash_t h, Variant** pDest);
  ArrayData* addVal(int64_t ki, CVarRef data);
  ArrayData* addVal(StringData* key, CVarRef data);
  ArrayData* addValWithRef(int64_t ki, CVarRef data);
  ArrayData* addValWithRef(StringData* key, CVarRef data);

  ArrayData* update(int64_t ki, CVarRef data);
  ArrayData* update(StringData* key, CVarRef data);
  ArrayData* updateRef(int64_t ki, CVarRef data);
  ArrayData* updateRef(StringData* key, CVarRef data);

  ArrayData* erase(ElmInd* ei, bool updateNext = false);

  HphpArray* copyImpl(HphpArray* target) const;

  bool isFull() const;
  Elm* newElm(ElmInd* e, size_t h0);
  Elm* newElmGrow(size_t h0);
  Elm* allocElm(ElmInd* ei);
  Elm* allocElmFast(ElmInd* ei);
  TypedValue& allocNextElm(uint32_t i);
  void initElmInt(Elm* e, int64_t ki, CVarRef data, bool byRef=false);
  void initElmStr(Elm* e, strhash_t h, StringData* key, CVarRef data,
                  bool byRef=false);
  void newElmInt(ElmInd* ei, int64_t ki, CVarRef data,
                      bool byRef=false);
  void newElmStr(ElmInd* ei, strhash_t h, StringData* key, CVarRef data,
                      bool byRef=false);
  ElmInd* allocData(size_t maxElms, size_t tableSize);
  ElmInd* reallocData(size_t maxElms, size_t tableSize);

  /**
   * grow() increases the hash table size and the number of slots for
   * elements by a factor of 2. grow() rebuilds the hash table, but it
   * does not compact the elements.
   */
  void grow() ATTRIBUTE_COLD;
  void growVec() ATTRIBUTE_COLD;

  /**
   * compact() does not change the hash table size or the number of slots
   * for elements. compact() rebuilds the hash table and compacts the
   * elements into the slots with lower addresses.
   */
  void compact(bool renumber=false) ATTRIBUTE_COLD;

  /**
   * resize() and resizeIfNeeded() will grow or compact the array as
   * necessary to ensure that there is room for a new element and a
   * new hash entry.
   *
   * resize() assumes that the array does not have room for a new element
   * or a new hash entry. resizeIfNeeded() will first check if there is room
   * for a new element and hash entry before growing or compacting the array.
   */
  void resize();
  void resizeIfNeeded();

  // Memory allocator methods.
  DECLARE_SMART_ALLOCATION(HphpArray);
  static void ReleaseVec(ArrayData*);
  static void Release(ArrayData*);

private:
  enum EmptyMode { StaticEmptyArray };
  explicit HphpArray(EmptyMode);
  // static singleton empty array.  Not a subclass because we want a fast
  // isHphpArray implementation; HphpArray should be effectively final.
  static HphpArray s_theEmptyArray;

  void initHash(size_t tableSize);
  void initNonEmpty(const HphpArray& other);

public:
  static bool validElmInd(ssize_t /*HphpArray::ElmInd*/ ei) {
    return (ei > ssize_t(HphpArray::ElmIndEmpty));
  }

  static size_t computeTableSize(uint32_t tableMask) {
    return size_t(tableMask) + size_t(1U);
  }

  static size_t computeMaxElms(uint32_t tableMask) {
    return size_t(tableMask) - size_t(tableMask) / HphpArray::LoadScale;
  }

  static size_t computeDataSize(uint32_t tableMask) {
    return computeTableSize(tableMask) * sizeof(HphpArray::ElmInd) +
      computeMaxElms(tableMask) * sizeof(HphpArray::Elm);
  }

};

//=============================================================================

// inline for performance reasons

inline HphpArray* ArrayData::Make(uint capacity) {
  return NEW(HphpArray)(capacity);
}

inline HphpArray* ArrayData::Make(uint size, const TypedValue* data) {
  return NEW(HphpArray)(size, data);
}

// HphpArray has more than one kind, so reuse ArrayData's virtual dispatch.
inline void HphpArray::release() {
  ArrayData::release();
}

///////////////////////////////////////////////////////////////////////////////
}

#endif // incl_HPHP_HPHP_ARRAY_H_