From 508280f5666a706a3681462b2a1d7de8107fd6fb Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 29 Aug 2014 15:00:27 +0100
Subject: [PATCH 01/16] disas/libvixl: Update to upstream VIXL 1.5

Update our copy of libvixl to upstream's 1.5 release.
This includes the upstream versions of the fixes we
were carrying locally (commit ffebe899).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1407162987-4659-1-git-send-email-peter.maydell@linaro.org
---
 disas/libvixl/README                  |   2 +-
 disas/libvixl/a64/assembler-a64.h     | 369 +++++++++++++++++++++++---
 disas/libvixl/a64/constants-a64.h     |  68 ++++-
 disas/libvixl/a64/cpu-a64.h           |  27 ++
 disas/libvixl/a64/decoder-a64.cc      |  15 +-
 disas/libvixl/a64/decoder-a64.h       |   1 +
 disas/libvixl/a64/disasm-a64.cc       |  88 ++++--
 disas/libvixl/a64/disasm-a64.h        |   2 +-
 disas/libvixl/a64/instructions-a64.cc |  25 +-
 disas/libvixl/a64/instructions-a64.h  |  10 +
 disas/libvixl/platform.h              |   8 +-
 disas/libvixl/utils.cc                |  10 +
 disas/libvixl/utils.h                 |  32 ++-
 13 files changed, 561 insertions(+), 96 deletions(-)
diff --git a/disas/libvixl/README b/disas/libvixl/README
index a0ecac3dd3..8301996e68 100644
--- a/disas/libvixl/README
+++ b/disas/libvixl/README
@@ -2,7 +2,7 @@
 The code in this directory is a subset of libvixl:
  https://github.com/armvixl/vixl
 (specifically, it is the set of files needed for disassembly only,
-taken from libvixl 1.4).
+taken from libvixl 1.5).
 Bugfixes should preferably be sent upstream initially.
 
 The disassembler does not currently support the entire A64 instruction
diff --git a/disas/libvixl/a64/assembler-a64.h b/disas/libvixl/a64/assembler-a64.h
index 1e2947b283..cc0b758dd3 100644
--- a/disas/libvixl/a64/assembler-a64.h
+++ b/disas/libvixl/a64/assembler-a64.h
@@ -28,6 +28,7 @@
 #define VIXL_A64_ASSEMBLER_A64_H_
 
 #include <list>
+#include <stack>
 
 #include "globals.h"
 #include "utils.h"
@@ -574,34 +575,107 @@ class MemOperand {
 
 class Label {
  public:
-  Label() : is_bound_(false), link_(NULL), target_(NULL) {}
+  Label() : location_(kLocationUnbound) {}
   ~Label() {
     // If the label has been linked to, it needs to be bound to a target.
     VIXL_ASSERT(!IsLinked() || IsBound());
   }
 
-  inline Instruction* link() const { return link_; }
-  inline Instruction* target() const { return target_; }
-
-  inline bool IsBound() const { return is_bound_; }
-  inline bool IsLinked() const { return link_ != NULL; }
-
-  inline void set_link(Instruction* new_link) { link_ = new_link; }
-
-  static const int kEndOfChain = 0;
+  inline bool IsBound() const { return location_ >= 0; }
+  inline bool IsLinked() const { return !links_.empty(); }
 
  private:
-  // Indicates if the label has been bound, ie its location is fixed.
-  bool is_bound_;
-  // Branches instructions branching to this label form a chained list, with
-  // their offset indicating where the next instruction is located.
-  // link_ points to the latest branch instruction generated branching to this
-  // branch.
-  // If link_ is not NULL, the label has been linked to.
-  Instruction* link_;
-  // The label location.
-  Instruction* target_;
+  // The list of linked instructions is stored in a stack-like structure. We
+  // don't use std::stack directly because it's slow for the common case where
+  // only one or two instructions refer to a label, and labels themselves are
+  // short-lived. This class behaves like std::stack, but the first few links
+  // are preallocated (configured by kPreallocatedLinks).
+  //
+  // If more than N links are required, this falls back to std::stack.
+  class LinksStack {
+   public:
+    LinksStack() : size_(0), links_extended_(NULL) {}
+    ~LinksStack() {
+      delete links_extended_;
+    }
 
+    size_t size() const {
+      return size_;
+    }
+
+    bool empty() const {
+      return size_ == 0;
+    }
+
+    void push(ptrdiff_t value) {
+      if (size_ < kPreallocatedLinks) {
+        links_[size_] = value;
+      } else {
+        if (links_extended_ == NULL) {
+          links_extended_ = new std::stack<ptrdiff_t>();
+        }
+        VIXL_ASSERT(size_ == (links_extended_->size() + kPreallocatedLinks));
+        links_extended_->push(value);
+      }
+      size_++;
+    }
+
+    ptrdiff_t top() const {
+      return (size_ <= kPreallocatedLinks) ? links_[size_ - 1]
+                                           : links_extended_->top();
+    }
+
+    void pop() {
+      size_--;
+      if (size_ >= kPreallocatedLinks) {
+        links_extended_->pop();
+        VIXL_ASSERT(size_ == (links_extended_->size() + kPreallocatedLinks));
+      }
+    }
+
+   private:
+    static const size_t kPreallocatedLinks = 4;
+
+    size_t size_;
+    ptrdiff_t links_[kPreallocatedLinks];
+    std::stack<ptrdiff_t> * links_extended_;
+  };
+
+  inline ptrdiff_t location() const { return location_; }
+
+  inline void Bind(ptrdiff_t location) {
+    // Labels can only be bound once.
+    VIXL_ASSERT(!IsBound());
+    location_ = location;
+  }
+
+  inline void AddLink(ptrdiff_t instruction) {
+    // If a label is bound, the assembler already has the information it needs
+    // to write the instruction, so there is no need to add it to links_.
+    VIXL_ASSERT(!IsBound());
+    links_.push(instruction);
+  }
+
+  inline ptrdiff_t GetAndRemoveNextLink() {
+    VIXL_ASSERT(IsLinked());
+    ptrdiff_t link = links_.top();
+    links_.pop();
+    return link;
+  }
+
+  // The offsets of the instructions that have linked to this label.
+  LinksStack links_;
+  // The label location.
+  ptrdiff_t location_;
+
+  static const ptrdiff_t kLocationUnbound = -1;
+
+  // It is not safe to copy labels, so disable the copy constructor by declaring
+  // it private (without an implementation).
+  Label(const Label&);
+
+  // The Assembler class is responsible for binding and linking labels, since
+  // the stored offsets need to be consistent with the Assembler's buffer.
   friend class Assembler;
 };
 
@@ -635,10 +709,49 @@ class Literal {
 };
 
 
+// Control whether or not position-independent code should be emitted.
+enum PositionIndependentCodeOption {
+  // All code generated will be position-independent; all branches and
+  // references to labels generated with the Label class will use PC-relative
+  // addressing.
+  PositionIndependentCode,
+
+  // Allow VIXL to generate code that refers to absolute addresses. With this
+  // option, it will not be possible to copy the code buffer and run it from a
+  // different address; code must be generated in its final location.
+  PositionDependentCode,
+
+  // Allow VIXL to assume that the bottom 12 bits of the address will be
+  // constant, but that the top 48 bits may change. This allows `adrp` to
+  // function in systems which copy code between pages, but otherwise maintain
+  // 4KB page alignment.
+  PageOffsetDependentCode
+};
+
+
+// Control how scaled- and unscaled-offset loads and stores are generated.
+enum LoadStoreScalingOption {
+  // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferScaledOffset,
+
+  // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
+  // register-offset, pre-index or post-index instructions if necessary.
+  PreferUnscaledOffset,
+
+  // Require scaled-immediate-offset instructions.
+  RequireScaledOffset,
+
+  // Require unscaled-immediate-offset instructions.
+  RequireUnscaledOffset
+};
+
+
 // Assembler.
 class Assembler {
  public:
-  Assembler(byte* buffer, unsigned buffer_size);
+  Assembler(byte* buffer, unsigned buffer_size,
+            PositionIndependentCodeOption pic = PositionIndependentCode);
 
   // The destructor asserts that one of the following is true:
   //  * The Assembler object has not been used.
@@ -662,12 +775,15 @@ class Assembler {
   // Label.
   // Bind a label to the current PC.
   void bind(Label* label);
-  int UpdateAndGetByteOffsetTo(Label* label);
-  inline int UpdateAndGetInstructionOffsetTo(Label* label) {
-    VIXL_ASSERT(Label::kEndOfChain == 0);
-    return UpdateAndGetByteOffsetTo(label) >> kInstructionSizeLog2;
-  }
 
+  // Return the address of a bound label.
+  template <typename T>
+  inline T GetLabelAddress(const Label * label) {
+    VIXL_ASSERT(label->IsBound());
+    VIXL_STATIC_ASSERT(sizeof(T) >= sizeof(uintptr_t));
+    VIXL_STATIC_ASSERT(sizeof(*buffer_) == 1);
+    return reinterpret_cast<T>(buffer_ + label->location());
+  }
 
   // Instruction set functions.
 
@@ -733,6 +849,12 @@ class Assembler {
   // Calculate the address of a PC offset.
   void adr(const Register& rd, int imm21);
 
+  // Calculate the page address of a label.
+  void adrp(const Register& rd, Label* label);
+
+  // Calculate the page address of a PC offset.
+  void adrp(const Register& rd, int imm21);
+
   // Data Processing instructions.
   // Add.
   void add(const Register& rd,
@@ -1112,31 +1234,76 @@ class Assembler {
 
   // Memory instructions.
   // Load integer or FP register.
-  void ldr(const CPURegister& rt, const MemOperand& src);
+  void ldr(const CPURegister& rt, const MemOperand& src,
+           LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store integer or FP register.
-  void str(const CPURegister& rt, const MemOperand& dst);
+  void str(const CPURegister& rt, const MemOperand& dst,
+           LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load word with sign extension.
-  void ldrsw(const Register& rt, const MemOperand& src);
+  void ldrsw(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load byte.
-  void ldrb(const Register& rt, const MemOperand& src);
+  void ldrb(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store byte.
-  void strb(const Register& rt, const MemOperand& dst);
+  void strb(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load byte with sign extension.
-  void ldrsb(const Register& rt, const MemOperand& src);
+  void ldrsb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load half-word.
-  void ldrh(const Register& rt, const MemOperand& src);
+  void ldrh(const Register& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Store half-word.
-  void strh(const Register& rt, const MemOperand& dst);
+  void strh(const Register& rt, const MemOperand& dst,
+            LoadStoreScalingOption option = PreferScaledOffset);
 
   // Load half-word with sign extension.
-  void ldrsh(const Register& rt, const MemOperand& src);
+  void ldrsh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferScaledOffset);
+
+  // Load integer or FP register (with unscaled offset).
+  void ldur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store integer or FP register (with unscaled offset).
+  void stur(const CPURegister& rt, const MemOperand& src,
+            LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load word with sign extension.
+  void ldursw(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte (with unscaled offset).
+  void ldurb(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store byte (with unscaled offset).
+  void sturb(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load byte with sign extension (and unscaled offset).
+  void ldursb(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word (with unscaled offset).
+  void ldurh(const Register& rt, const MemOperand& src,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Store half-word (with unscaled offset).
+  void sturh(const Register& rt, const MemOperand& dst,
+             LoadStoreScalingOption option = PreferUnscaledOffset);
+
+  // Load half-word with sign extension (and unscaled offset).
+  void ldursh(const Register& rt, const MemOperand& src,
+              LoadStoreScalingOption option = PreferUnscaledOffset);
 
   // Load integer or FP register pair.
   void ldp(const CPURegister& rt, const CPURegister& rt2,
@@ -1166,6 +1333,79 @@ class Assembler {
   // Load single precision floating point literal to FP register.
   void ldr(const FPRegister& ft, float imm);
 
+  // Store exclusive byte.
+  void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive half-word.
+  void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store exclusive register.
+  void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load exclusive byte.
+  void ldxrb(const Register& rt, const MemOperand& src);
+
+  // Load exclusive half-word.
+  void ldxrh(const Register& rt, const MemOperand& src);
+
+  // Load exclusive register.
+  void ldxr(const Register& rt, const MemOperand& src);
+
+  // Store exclusive register pair.
+  void stxp(const Register& rs,
+            const Register& rt,
+            const Register& rt2,
+            const MemOperand& dst);
+
+  // Load exclusive register pair.
+  void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release exclusive byte.
+  void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive half-word.
+  void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Store-release exclusive register.
+  void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
+
+  // Load-acquire exclusive byte.
+  void ldaxrb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive half-word.
+  void ldaxrh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire exclusive register.
+  void ldaxr(const Register& rt, const MemOperand& src);
+
+  // Store-release exclusive register pair.
+  void stlxp(const Register& rs,
+             const Register& rt,
+             const Register& rt2,
+             const MemOperand& dst);
+
+  // Load-acquire exclusive register pair.
+  void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
+
+  // Store-release byte.
+  void stlrb(const Register& rt, const MemOperand& dst);
+
+  // Store-release half-word.
+  void stlrh(const Register& rt, const MemOperand& dst);
+
+  // Store-release register.
+  void stlr(const Register& rt, const MemOperand& dst);
+
+  // Load-acquire byte.
+  void ldarb(const Register& rt, const MemOperand& src);
+
+  // Load-acquire half-word.
+  void ldarh(const Register& rt, const MemOperand& src);
+
+  // Load-acquire register.
+  void ldar(const Register& rt, const MemOperand& src);
+
+
   // Move instructions. The default shift of -1 indicates that the move
   // instruction will calculate an appropriate 16-bit immediate and left shift
   // that is equal to the 64-bit immediate argument. If an explicit left shift
@@ -1214,6 +1454,9 @@ class Assembler {
   // System hint.
   void hint(SystemHint code);
 
+  // Clear exclusive monitor.
+  void clrex(int imm4 = 0xf);
+
   // Data memory barrier.
   void dmb(BarrierDomain domain, BarrierType type);
 
@@ -1429,6 +1672,11 @@ class Assembler {
     return rt2.code() << Rt2_offset;
   }
 
+  static Instr Rs(CPURegister rs) {
+    VIXL_ASSERT(rs.code() != kSPRegInternalCode);
+    return rs.code() << Rs_offset;
+  }
+
   // These encoding functions allow the stack pointer to be encoded, and
   // disallow the zero register.
   static Instr RdSP(Register rd) {
@@ -1619,6 +1867,11 @@ class Assembler {
     return imm7 << ImmHint_offset;
   }
 
+  static Instr CRm(int imm4) {
+    VIXL_ASSERT(is_uint4(imm4));
+    return imm4 << CRm_offset;
+  }
+
   static Instr ImmBarrierDomain(int imm2) {
     VIXL_ASSERT(is_uint2(imm2));
     return imm2 << ImmBarrierDomain_offset;
@@ -1660,16 +1913,20 @@ class Assembler {
   }
 
   // Size of the code generated in bytes
-  uint64_t SizeOfCodeGenerated() const {
+  size_t SizeOfCodeGenerated() const {
     VIXL_ASSERT((pc_ >= buffer_) && (pc_ < (buffer_ + buffer_size_)));
     return pc_ - buffer_;
   }
 
   // Size of the code generated since label to the current position.
-  uint64_t SizeOfCodeGeneratedSince(Label* label) const {
+  size_t SizeOfCodeGeneratedSince(Label* label) const {
+    size_t pc_offset = SizeOfCodeGenerated();
+
     VIXL_ASSERT(label->IsBound());
-    VIXL_ASSERT((pc_ >= label->target()) && (pc_ < (buffer_ + buffer_size_)));
-    return pc_ - label->target();
+    VIXL_ASSERT(pc_offset >= static_cast<size_t>(label->location()));
+    VIXL_ASSERT(pc_offset < buffer_size_);
+
+    return pc_offset - label->location();
   }
 
 
@@ -1693,6 +1950,15 @@ class Assembler {
   void EmitLiteralPool(LiteralPoolEmitOption option = NoJumpRequired);
   size_t LiteralPoolSize();
 
+  inline PositionIndependentCodeOption pic() {
+    return pic_;
+  }
+
+  inline bool AllowPageOffsetDependentCode() {
+    return (pic() == PageOffsetDependentCode) ||
+           (pic() == PositionDependentCode);
+  }
+
  protected:
   inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const {
     return reg.Is64Bits() ? xzr : wzr;
@@ -1701,7 +1967,8 @@ class Assembler {
 
   void LoadStore(const CPURegister& rt,
                  const MemOperand& addr,
-                 LoadStoreOp op);
+                 LoadStoreOp op,
+                 LoadStoreScalingOption option = PreferScaledOffset);
   static bool IsImmLSUnscaled(ptrdiff_t offset);
   static bool IsImmLSScaled(ptrdiff_t offset, LSDataSize size);
 
@@ -1717,9 +1984,9 @@ class Assembler {
                         LogicalOp op);
   static bool IsImmLogical(uint64_t value,
                            unsigned width,
-                           unsigned* n,
-                           unsigned* imm_s,
-                           unsigned* imm_r);
+                           unsigned* n = NULL,
+                           unsigned* imm_s = NULL,
+                           unsigned* imm_r = NULL);
 
   void ConditionalCompare(const Register& rn,
                           const Operand& operand,
@@ -1823,6 +2090,17 @@ class Assembler {
 
   void RecordLiteral(int64_t imm, unsigned size);
 
+  // Link the current (not-yet-emitted) instruction to the specified label, then
+  // return an offset to be encoded in the instruction. If the label is not yet
+  // bound, an offset of 0 is returned.
+  ptrdiff_t LinkAndGetByteOffsetTo(Label * label);
+  ptrdiff_t LinkAndGetInstructionOffsetTo(Label * label);
+  ptrdiff_t LinkAndGetPageOffsetTo(Label * label);
+
+  // A common implementation for the LinkAndGet<Type>OffsetTo helpers.
+  template <int element_size>
+  ptrdiff_t LinkAndGetOffsetTo(Label* label);
+
   // Emit the instruction at pc_.
   void Emit(Instr instruction) {
     VIXL_STATIC_ASSERT(sizeof(*pc_) == 1);
@@ -1864,12 +2142,15 @@ class Assembler {
   // The buffer into which code and relocation info are generated.
   Instruction* buffer_;
   // Buffer size, in bytes.
-  unsigned buffer_size_;
+  size_t buffer_size_;
   Instruction* pc_;
   std::list<Literal*> literals_;
   Instruction* next_literal_pool_check_;
   unsigned literal_pool_monitor_;
 
+  PositionIndependentCodeOption pic_;
+
+  friend class Label;
   friend class BlockLiteralPoolScope;
 
 #ifdef DEBUG
diff --git a/disas/libvixl/a64/constants-a64.h b/disas/libvixl/a64/constants-a64.h
index 99677c1be3..7a14f85f59 100644
--- a/disas/libvixl/a64/constants-a64.h
+++ b/disas/libvixl/a64/constants-a64.h
@@ -46,13 +46,13 @@ R(24) R(25) R(26) R(27) R(28) R(29) R(30) R(31)
 
 #define INSTRUCTION_FIELDS_LIST(V_)                                            \
 /* Register fields */                                                          \
-V_(Rd, 4, 0, Bits)                        /* Destination register.     */      \
-V_(Rn, 9, 5, Bits)                        /* First source register.    */      \
-V_(Rm, 20, 16, Bits)                      /* Second source register.   */      \
-V_(Ra, 14, 10, Bits)                      /* Third source register.    */      \
-V_(Rt, 4, 0, Bits)                        /* Load dest / store source. */      \
-V_(Rt2, 14, 10, Bits)                     /* Load second dest /        */      \
-                                         /* store second source.      */       \
+V_(Rd, 4, 0, Bits)                        /* Destination register.        */   \
+V_(Rn, 9, 5, Bits)                        /* First source register.       */   \
+V_(Rm, 20, 16, Bits)                      /* Second source register.      */   \
+V_(Ra, 14, 10, Bits)                      /* Third source register.       */   \
+V_(Rt, 4, 0, Bits)                        /* Load/store register.         */   \
+V_(Rt2, 14, 10, Bits)                     /* Load/store second register.  */   \
+V_(Rs, 20, 16, Bits)                      /* Exclusive access status.     */   \
 V_(PrefetchMode, 4, 0, Bits)                                                   \
                                                                                \
 /* Common bits */                                                              \
@@ -126,6 +126,13 @@ V_(SysOp1, 18, 16, Bits)                                                       \
 V_(SysOp2, 7, 5, Bits)                                                         \
 V_(CRn, 15, 12, Bits)                                                          \
 V_(CRm, 11, 8, Bits)                                                           \
+                                                                               \
+/* Load-/store-exclusive */                                                    \
+V_(LdStXLoad, 22, 22, Bits)                                                    \
+V_(LdStXNotExclusive, 23, 23, Bits)                                            \
+V_(LdStXAcquireRelease, 15, 15, Bits)                                          \
+V_(LdStXSizeLog2, 31, 30, Bits)                                                \
+V_(LdStXPair, 21, 21, Bits)                                                    \
 
 
 #define SYSTEM_REGISTER_FIELDS_LIST(V_, M_)                                    \
@@ -585,6 +592,13 @@ enum MemBarrierOp {
   ISB             = MemBarrierFixed | 0x00000040
 };
 
+enum SystemExclusiveMonitorOp {
+  SystemExclusiveMonitorFixed = 0xD503305F,
+  SystemExclusiveMonitorFMask = 0xFFFFF0FF,
+  SystemExclusiveMonitorMask  = 0xFFFFF0FF,
+  CLREX                       = SystemExclusiveMonitorFixed
+};
+
 // Any load or store.
 enum LoadStoreAnyOp {
   LoadStoreAnyFMask = 0x0a000000,
@@ -702,7 +716,7 @@ enum LoadStoreUnscaledOffsetOp {
 
 // Load/store (post, pre, offset and unsigned.)
 enum LoadStoreOp {
-  LoadStoreOpMask   = 0xC4C00000,
+  LoadStoreOpMask = 0xC4C00000,
   #define LOAD_STORE(A, B, C, D)  \
   A##B##_##C = D
   LOAD_STORE_OP_LIST(LOAD_STORE),
@@ -756,6 +770,44 @@ enum LoadStoreRegisterOffset {
   #undef LOAD_STORE_REGISTER_OFFSET
 };
 
+enum LoadStoreExclusive {
+  LoadStoreExclusiveFixed = 0x08000000,
+  LoadStoreExclusiveFMask = 0x3F000000,
+  LoadStoreExclusiveMask  = 0xFFE08000,
+  STXRB_w  = LoadStoreExclusiveFixed | 0x00000000,
+  STXRH_w  = LoadStoreExclusiveFixed | 0x40000000,
+  STXR_w   = LoadStoreExclusiveFixed | 0x80000000,
+  STXR_x   = LoadStoreExclusiveFixed | 0xC0000000,
+  LDXRB_w  = LoadStoreExclusiveFixed | 0x00400000,
+  LDXRH_w  = LoadStoreExclusiveFixed | 0x40400000,
+  LDXR_w   = LoadStoreExclusiveFixed | 0x80400000,
+  LDXR_x   = LoadStoreExclusiveFixed | 0xC0400000,
+  STXP_w   = LoadStoreExclusiveFixed | 0x80200000,
+  STXP_x   = LoadStoreExclusiveFixed | 0xC0200000,
+  LDXP_w   = LoadStoreExclusiveFixed | 0x80600000,
+  LDXP_x   = LoadStoreExclusiveFixed | 0xC0600000,
+  STLXRB_w = LoadStoreExclusiveFixed | 0x00008000,
+  STLXRH_w = LoadStoreExclusiveFixed | 0x40008000,
+  STLXR_w  = LoadStoreExclusiveFixed | 0x80008000,
+  STLXR_x  = LoadStoreExclusiveFixed | 0xC0008000,
+  LDAXRB_w = LoadStoreExclusiveFixed | 0x00408000,
+  LDAXRH_w = LoadStoreExclusiveFixed | 0x40408000,
+  LDAXR_w  = LoadStoreExclusiveFixed | 0x80408000,
+  LDAXR_x  = LoadStoreExclusiveFixed | 0xC0408000,
+  STLXP_w  = LoadStoreExclusiveFixed | 0x80208000,
+  STLXP_x  = LoadStoreExclusiveFixed | 0xC0208000,
+  LDAXP_w  = LoadStoreExclusiveFixed | 0x80608000,
+  LDAXP_x  = LoadStoreExclusiveFixed | 0xC0608000,
+  STLRB_w  = LoadStoreExclusiveFixed | 0x00808000,
+  STLRH_w  = LoadStoreExclusiveFixed | 0x40808000,
+  STLR_w   = LoadStoreExclusiveFixed | 0x80808000,
+  STLR_x   = LoadStoreExclusiveFixed | 0xC0808000,
+  LDARB_w  = LoadStoreExclusiveFixed | 0x00C08000,
+  LDARH_w  = LoadStoreExclusiveFixed | 0x40C08000,
+  LDAR_w   = LoadStoreExclusiveFixed | 0x80C08000,
+  LDAR_x   = LoadStoreExclusiveFixed | 0xC0C08000
+};
+
 // Conditional compare.
 enum ConditionalCompareOp {
   ConditionalCompareMask = 0x60000000,
diff --git a/disas/libvixl/a64/cpu-a64.h b/disas/libvixl/a64/cpu-a64.h
index dfd8f015cf..59b7974a19 100644
--- a/disas/libvixl/a64/cpu-a64.h
+++ b/disas/libvixl/a64/cpu-a64.h
@@ -28,6 +28,7 @@
 #define VIXL_CPU_A64_H
 
 #include "globals.h"
+#include "instructions-a64.h"
 
 namespace vixl {
 
@@ -42,6 +43,32 @@ class CPU {
   // safely run.
   static void EnsureIAndDCacheCoherency(void *address, size_t length);
 
+  // Handle tagged pointers.
+  template <typename T>
+  static T SetPointerTag(T pointer, uint64_t tag) {
+    VIXL_ASSERT(is_uintn(kAddressTagWidth, tag));
+
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    raw = (raw & ~kAddressTagMask) | (tag << kAddressTagOffset);
+    return (T)raw;
+  }
+
+  template <typename T>
+  static uint64_t GetPointerTag(T pointer) {
+    // Use C-style casts to get static_cast behaviour for integral types (T),
+    // and reinterpret_cast behaviour for other types.
+
+    uint64_t raw = (uint64_t)pointer;
+    VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(raw));
+
+    return (raw & kAddressTagMask) >> kAddressTagOffset;
+  }
+
  private:
   // Return the content of the cache type register.
   static uint32_t GetCacheType();
diff --git a/disas/libvixl/a64/decoder-a64.cc b/disas/libvixl/a64/decoder-a64.cc
index 8450eb3b49..5831b73024 100644
--- a/disas/libvixl/a64/decoder-a64.cc
+++ b/disas/libvixl/a64/decoder-a64.cc
@@ -171,9 +171,9 @@ void Decoder::DecodePCRelAddressing(Instruction* instr) {
 
 void Decoder::DecodeBranchSystemException(Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x4) ||
-         (instr->Bits(27, 24) == 0x5) ||
-         (instr->Bits(27, 24) == 0x6) ||
-         (instr->Bits(27, 24) == 0x7) );
+              (instr->Bits(27, 24) == 0x5) ||
+              (instr->Bits(27, 24) == 0x6) ||
+              (instr->Bits(27, 24) == 0x7) );
 
   switch (instr->Bits(31, 29)) {
     case 0:
@@ -272,16 +272,15 @@ void Decoder::DecodeBranchSystemException(Instruction* instr) {
 
 void Decoder::DecodeLoadStore(Instruction* instr) {
   VIXL_ASSERT((instr->Bits(27, 24) == 0x8) ||
-         (instr->Bits(27, 24) == 0x9) ||
-         (instr->Bits(27, 24) == 0xC) ||
-         (instr->Bits(27, 24) == 0xD) );
+              (instr->Bits(27, 24) == 0x9) ||
+              (instr->Bits(27, 24) == 0xC) ||
+              (instr->Bits(27, 24) == 0xD) );
 
   if (instr->Bit(24) == 0) {
     if (instr->Bit(28) == 0) {
       if (instr->Bit(29) == 0) {
         if (instr->Bit(26) == 0) {
-          // TODO: VisitLoadStoreExclusive.
-          VisitUnimplemented(instr);
+          VisitLoadStoreExclusive(instr);
         } else {
           DecodeAdvSIMDLoadStore(instr);
         }
diff --git a/disas/libvixl/a64/decoder-a64.h b/disas/libvixl/a64/decoder-a64.h
index bbbbd81247..72c15196ce 100644
--- a/disas/libvixl/a64/decoder-a64.h
+++ b/disas/libvixl/a64/decoder-a64.h
@@ -59,6 +59,7 @@
   V(LoadStorePreIndex)              \
   V(LoadStoreRegisterOffset)        \
   V(LoadStoreUnsignedOffset)        \
+  V(LoadStoreExclusive)             \
   V(LogicalShifted)                 \
   V(AddSubShifted)                  \
   V(AddSubExtended)                 \
diff --git a/disas/libvixl/a64/disasm-a64.cc b/disas/libvixl/a64/disasm-a64.cc
index f81ce4bbb8..248ebfd436 100644
--- a/disas/libvixl/a64/disasm-a64.cc
+++ b/disas/libvixl/a64/disasm-a64.cc
@@ -24,6 +24,7 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#include <cstdlib>
 #include "a64/disasm-a64.h"
 
 namespace vixl {
@@ -529,7 +530,7 @@ void Disassembler::VisitExtract(Instruction* instr) {
 void Disassembler::VisitPCRelAddressing(Instruction* instr) {
   switch (instr->Mask(PCRelAddressingMask)) {
     case ADR: Format(instr, "adr", "'Xd, 'AddrPCRelByte"); break;
-    // ADRP is not implemented.
+    case ADRP: Format(instr, "adrp", "'Xd, 'AddrPCRelPage"); break;
     default: Format(instr, "unimplemented", "(PCRelAddressing)");
   }
 }
@@ -943,6 +944,49 @@ void Disassembler::VisitLoadStorePairNonTemporal(Instruction* instr) {
 }
 
 
+void Disassembler::VisitLoadStoreExclusive(Instruction* instr) {
+  const char *mnemonic = "unimplemented";
+  const char *form;
+
+  switch (instr->Mask(LoadStoreExclusiveMask)) {
+    case STXRB_w: mnemonic = "stxrb"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXRH_w: mnemonic = "stxrh"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXR_w: mnemonic = "stxr"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STXR_x: mnemonic = "stxr"; form = "'Ws, 'Xt, ['Xns]"; break;
+    case LDXRB_w: mnemonic = "ldxrb"; form = "'Wt, ['Xns]"; break;
+    case LDXRH_w: mnemonic = "ldxrh"; form = "'Wt, ['Xns]"; break;
+    case LDXR_w: mnemonic = "ldxr"; form = "'Wt, ['Xns]"; break;
+    case LDXR_x: mnemonic = "ldxr"; form = "'Xt, ['Xns]"; break;
+    case STXP_w: mnemonic = "stxp"; form = "'Ws, 'Wt, 'Wt2, ['Xns]"; break;
+    case STXP_x: mnemonic = "stxp"; form = "'Ws, 'Xt, 'Xt2, ['Xns]"; break;
+    case LDXP_w: mnemonic = "ldxp"; form = "'Wt, 'Wt2, ['Xns]"; break;
+    case LDXP_x: mnemonic = "ldxp"; form = "'Xt, 'Xt2, ['Xns]"; break;
+    case STLXRB_w: mnemonic = "stlxrb"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXRH_w: mnemonic = "stlxrh"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXR_w: mnemonic = "stlxr"; form = "'Ws, 'Wt, ['Xns]"; break;
+    case STLXR_x: mnemonic = "stlxr"; form = "'Ws, 'Xt, ['Xns]"; break;
+    case LDAXRB_w: mnemonic = "ldaxrb"; form = "'Wt, ['Xns]"; break;
+    case LDAXRH_w: mnemonic = "ldaxrh"; form = "'Wt, ['Xns]"; break;
+    case LDAXR_w: mnemonic = "ldaxr"; form = "'Wt, ['Xns]"; break;
+    case LDAXR_x: mnemonic = "ldaxr"; form = "'Xt, ['Xns]"; break;
+    case STLXP_w: mnemonic = "stlxp"; form = "'Ws, 'Wt, 'Wt2, ['Xns]"; break;
+    case STLXP_x: mnemonic = "stlxp"; form = "'Ws, 'Xt, 'Xt2, ['Xns]"; break;
+    case LDAXP_w: mnemonic = "ldaxp"; form = "'Wt, 'Wt2, ['Xns]"; break;
+    case LDAXP_x: mnemonic = "ldaxp"; form = "'Xt, 'Xt2, ['Xns]"; break;
+    case STLRB_w: mnemonic = "stlrb"; form = "'Wt, ['Xns]"; break;
+    case STLRH_w: mnemonic = "stlrh"; form = "'Wt, ['Xns]"; break;
+    case STLR_w: mnemonic = "stlr"; form = "'Wt, ['Xns]"; break;
+    case STLR_x: mnemonic = "stlr"; form = "'Xt, ['Xns]"; break;
+    case LDARB_w: mnemonic = "ldarb"; form = "'Wt, ['Xns]"; break;
+    case LDARH_w: mnemonic = "ldarh"; form = "'Wt, ['Xns]"; break;
+    case LDAR_w: mnemonic = "ldar"; form = "'Wt, ['Xns]"; break;
+    case LDAR_x: mnemonic = "ldar"; form = "'Xt, ['Xns]"; break;
+    default: form = "(LoadStoreExclusive)";
+  }
+  Format(instr, mnemonic, form);
+}
+
+
 void Disassembler::VisitFPCompare(Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "'Fn, 'Fm";
@@ -1162,7 +1206,15 @@ void Disassembler::VisitSystem(Instruction* instr) {
   const char *mnemonic = "unimplemented";
   const char *form = "(System)";
 
-  if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
+  if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
+    switch (instr->Mask(SystemExclusiveMonitorMask)) {
+      case CLREX: {
+        mnemonic = "clrex";
+        form = (instr->CRm() == 0xf) ? NULL : "'IX";
+        break;
+      }
+    }
+  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
     switch (instr->Mask(SystemSysRegMask)) {
       case MRS: {
         mnemonic = "mrs";
@@ -1184,7 +1236,6 @@ void Disassembler::VisitSystem(Instruction* instr) {
       }
     }
   } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
-    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
     switch (instr->ImmHint()) {
       case NOP: {
         mnemonic = "nop";
@@ -1312,6 +1363,7 @@ int Disassembler::SubstituteRegisterField(Instruction* instr,
     case 'n': reg_num = instr->Rn(); break;
     case 'm': reg_num = instr->Rm(); break;
     case 'a': reg_num = instr->Ra(); break;
+    case 's': reg_num = instr->Rs(); break;
     case 't': {
       if (format[2] == '2') {
         reg_num = instr->Rt2();
@@ -1458,6 +1510,10 @@ int Disassembler::SubstituteImmediateField(Instruction* instr,
       AppendToOutput("#0x%" PRIx64, instr->ImmException());
       return 6;
     }
+    case 'X': {  // IX - CLREX instruction.
+      AppendToOutput("#0x%" PRIx64, instr->CRm());
+      return 2;
+    }
     default: {
       VIXL_UNIMPLEMENTED();
       return 0;
@@ -1564,21 +1620,20 @@ int Disassembler::SubstituteConditionField(Instruction* instr,
 
 int Disassembler::SubstitutePCRelAddressField(Instruction* instr,
                                               const char* format) {
-  USE(format);
-  VIXL_ASSERT(strncmp(format, "AddrPCRel", 9) == 0);
+  VIXL_ASSERT((strcmp(format, "AddrPCRelByte") == 0) ||   // Used by `adr`.
+              (strcmp(format, "AddrPCRelPage") == 0));    // Used by `adrp`.
 
-  int offset = instr->ImmPCRel();
+  int64_t offset = instr->ImmPCRel();
+  Instruction * base = instr;
 
-  // Only ADR (AddrPCRelByte) is supported.
-  VIXL_ASSERT(strcmp(format, "AddrPCRelByte") == 0);
-
-  char sign = '+';
-  if (offset < 0) {
-    offset = -offset;
-    sign = '-';
+  if (format[9] == 'P') {
+    offset *= kPageSize;
+    base = AlignDown(base, kPageSize);
   }
-  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  AppendToOutput("#%c0x%x (addr %p)", sign, offset, instr + offset);
+
+  char sign = (offset < 0) ? '-' : '+';
+  void * target = reinterpret_cast<void *>(base + offset);
+  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, std::abs(offset), target);
   return 13;
 }
 
@@ -1606,7 +1661,8 @@ int Disassembler::SubstituteBranchTargetField(Instruction* instr,
     sign = '-';
   }
   VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
-  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, instr + offset);
+  void * address = reinterpret_cast<void *>(instr + offset);
+  AppendToOutput("#%c0x%" PRIx64 " (addr %p)", sign, offset, address);
   return 8;
 }
 
diff --git a/disas/libvixl/a64/disasm-a64.h b/disas/libvixl/a64/disasm-a64.h
index 3a56e15515..06ee43fdeb 100644
--- a/disas/libvixl/a64/disasm-a64.h
+++ b/disas/libvixl/a64/disasm-a64.h
@@ -85,7 +85,7 @@ class Disassembler: public DecoderVisitor {
   bool IsMovzMovnImm(unsigned reg_size, uint64_t value);
 
   void ResetOutput();
-  void AppendToOutput(const char* string, ...);
+  void AppendToOutput(const char* string, ...) PRINTF_CHECK(2, 3);
 
   char* buffer_;
   uint32_t buffer_pos_;
diff --git a/disas/libvixl/a64/instructions-a64.cc b/disas/libvixl/a64/instructions-a64.cc
index c4eb7c4518..e9caceb37b 100644
--- a/disas/libvixl/a64/instructions-a64.cc
+++ b/disas/libvixl/a64/instructions-a64.cc
@@ -149,17 +149,24 @@ LSDataSize CalcLSPairDataSize(LoadStorePairOp op) {
 
 
 Instruction* Instruction::ImmPCOffsetTarget() {
+  Instruction * base = this;
   ptrdiff_t offset;
   if (IsPCRelAddressing()) {
-    // PC-relative addressing. Only ADR is supported.
+    // ADR and ADRP.
     offset = ImmPCRel();
+    if (Mask(PCRelAddressingMask) == ADRP) {
+      base = AlignDown(base, kPageSize);
+      offset *= kPageSize;
+    } else {
+      VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
+    }
   } else {
     // All PC-relative branches.
     VIXL_ASSERT(BranchType() != UnknownBranchType);
     // Relative branch offsets are instruction-size-aligned.
     offset = ImmBranch() << kInstructionSizeLog2;
   }
-  return this + offset;
+  return base + offset;
 }
 
 
@@ -185,10 +192,16 @@ void Instruction::SetImmPCOffsetTarget(Instruction* target) {
 
 
 void Instruction::SetPCRelImmTarget(Instruction* target) {
-  // ADRP is not supported, so 'this' must point to an ADR instruction.
-  VIXL_ASSERT(Mask(PCRelAddressingMask) == ADR);
-
-  Instr imm = Assembler::ImmPCRelAddress(target - this);
+  int32_t imm21;
+  if ((Mask(PCRelAddressingMask) == ADR)) {
+    imm21 = target - this;
+  } else {
+    VIXL_ASSERT(Mask(PCRelAddressingMask) == ADRP);
+    uintptr_t this_page = reinterpret_cast<uintptr_t>(this) / kPageSize;
+    uintptr_t target_page = reinterpret_cast<uintptr_t>(target) / kPageSize;
+    imm21 = target_page - this_page;
+  }
+  Instr imm = Assembler::ImmPCRelAddress(imm21);
 
   SetInstructionBits(Mask(~ImmPCRel_mask) | imm);
 }
diff --git a/disas/libvixl/a64/instructions-a64.h b/disas/libvixl/a64/instructions-a64.h
index a4240d7d33..d5b90c52e1 100644
--- a/disas/libvixl/a64/instructions-a64.h
+++ b/disas/libvixl/a64/instructions-a64.h
@@ -41,6 +41,10 @@ const unsigned kLiteralEntrySize = 4;
 const unsigned kLiteralEntrySizeLog2 = 2;
 const unsigned kMaxLoadLiteralRange = 1 * MBytes;
 
+// This is the nominal page size (as used by the adrp instruction); the actual
+// size of the memory pages allocated by the kernel is likely to differ.
+const unsigned kPageSize = 4 * KBytes;
+
 const unsigned kWRegSize = 32;
 const unsigned kWRegSizeLog2 = 5;
 const unsigned kWRegSizeInBytes = kWRegSize / 8;
@@ -79,6 +83,12 @@ const unsigned kZeroRegCode = 31;
 const unsigned kSPRegInternalCode = 63;
 const unsigned kRegCodeMask = 0x1f;
 
+const unsigned kAddressTagOffset = 56;
+const unsigned kAddressTagWidth = 8;
+const uint64_t kAddressTagMask =
+    ((UINT64_C(1) << kAddressTagWidth) - 1) << kAddressTagOffset;
+VIXL_STATIC_ASSERT(kAddressTagMask == UINT64_C(0xff00000000000000));
+
 // AArch64 floating-point specifics. These match IEEE-754.
 const unsigned kDoubleMantissaBits = 52;
 const unsigned kDoubleExponentBits = 11;
diff --git a/disas/libvixl/platform.h b/disas/libvixl/platform.h
index b5c2085b90..de2b110cca 100644
--- a/disas/libvixl/platform.h
+++ b/disas/libvixl/platform.h
@@ -28,14 +28,10 @@
 #define PLATFORM_H
 
 // Define platform specific functionalities.
+#include <signal.h>
 
 namespace vixl {
-#ifdef USE_SIMULATOR
-// Currently we assume running the simulator implies running on x86 hardware.
-inline void HostBreakpoint() { asm("int3"); }
-#else
-inline void HostBreakpoint() { asm("brk"); }
-#endif
+inline void HostBreakpoint() { raise(SIGINT); }
 }  // namespace vixl
 
 #endif
diff --git a/disas/libvixl/utils.cc b/disas/libvixl/utils.cc
index c9c05d1e18..4d4fcbdad4 100644
--- a/disas/libvixl/utils.cc
+++ b/disas/libvixl/utils.cc
@@ -124,4 +124,14 @@ int CountSetBits(uint64_t value, int width) {
 
   return value;
 }
+
+
+uint64_t LowestSetBit(uint64_t value) {
+  return value & -value;
+}
+
+
+bool IsPowerOf2(int64_t value) {
+  return (value != 0) && ((value & (value - 1)) == 0);
+}
 }  // namespace vixl
diff --git a/disas/libvixl/utils.h b/disas/libvixl/utils.h
index 83c928c8e3..b472f0e6cd 100644
--- a/disas/libvixl/utils.h
+++ b/disas/libvixl/utils.h
@@ -33,6 +33,14 @@
 
 namespace vixl {
 
+// Macros for compile-time format checking.
+#if defined(__GNUC__)
+#define PRINTF_CHECK(format_index, varargs_index) \
+  __attribute__((format(printf, format_index, varargs_index)))
+#else
+#define PRINTF_CHECK(format_index, varargs_index)
+#endif
+
 // Check number width.
 inline bool is_intn(unsigned n, int64_t x) {
   VIXL_ASSERT((0 < n) && (n < 64));
@@ -155,6 +163,8 @@ int CountLeadingZeros(uint64_t value, int width);
 int CountLeadingSignBits(int64_t value, int width);
 int CountTrailingZeros(uint64_t value, int width);
 int CountSetBits(uint64_t value, int width);
+uint64_t LowestSetBit(uint64_t value);
+bool IsPowerOf2(int64_t value);
 
 // Pointer alignment
 // TODO: rename/refactor to make it specific to instructions.
@@ -167,21 +177,31 @@ bool IsWordAligned(T pointer) {
 // Increment a pointer until it has the specified alignment.
 template<class T>
 T AlignUp(T pointer, size_t alignment) {
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
-  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uintptr_t pointer_raw = (uintptr_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+
   size_t align_step = (alignment - pointer_raw) % alignment;
   VIXL_ASSERT((pointer_raw + align_step) % alignment == 0);
-  return reinterpret_cast<T>(pointer_raw + align_step);
+
+  return (T)(pointer_raw + align_step);
 }
 
 // Decrement a pointer until it has the specified alignment.
 template<class T>
 T AlignDown(T pointer, size_t alignment) {
-  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(uintptr_t));
-  uintptr_t pointer_raw = reinterpret_cast<uintptr_t>(pointer);
+  // Use C-style casts to get static_cast behaviour for integral types (T), and
+  // reinterpret_cast behaviour for other types.
+
+  uintptr_t pointer_raw = (uintptr_t)pointer;
+  VIXL_STATIC_ASSERT(sizeof(pointer) == sizeof(pointer_raw));
+
   size_t align_step = pointer_raw % alignment;
   VIXL_ASSERT((pointer_raw - align_step) % alignment == 0);
-  return reinterpret_cast<T>(pointer_raw - align_step);
+
+  return (T)(pointer_raw - align_step);
 }
 
 

From ed1f13d607e2c64c66bea49d6f4edaf278d3d246 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 29 Aug 2014 15:00:28 +0100
Subject: [PATCH 02/16] target-arm: Fix regression that disabled VFP for ARMv5
 CPUs

Commit 2c7ffc414 added support for honouring the CPACR coprocessor
access control register bits which may disable access to VFP
and Neon instructions. However it failed to account for the
fact that the CPACR is only present starting from the ARMv6
architecture version, so it accidentally disabled VFP completely
for ARMv5 CPUs like the ARM926. Linux would detect this as
"no VFP present" and probably fall back to its own emulation,
but other guest OSes might crash or misbehave.

This fixes bug LP:1359930.

Reported-by: Jakub Jermar <jakub@jermar.eu>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1408714940-7192-1-git-send-email-peter.maydell@linaro.org
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 8098b8d357..659b104608 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -1255,7 +1255,14 @@ static inline bool arm_singlestep_active(CPUARMState *env)
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
-    int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    int fpen;
+
+    if (arm_feature(env, ARM_FEATURE_V6)) {
+        fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    } else {
+        /* CPACR doesn't exist before v6, so VFP is always accessible */
+        fpen = 3;
+    }
 
     if (is_a64(env)) {
         *pc = env->pc;

From c379621451e64cad166a60f42e1d67f0438b8d1b Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Fri, 29 Aug 2014 15:00:28 +0100
Subject: [PATCH 03/16] target-arm: Correct Cortex-A57 ISAR5 and AA64ISAR0 ID
 register values

We implement the crypto extensions but were incorrectly reporting
ID register values for the Cortex-A57 which did not advertise
crypto. Use the correct values as described in the TRM.
With this fix Linux correctly detects presence of the crypto
features and advertises them in /proc/cpuinfo.

Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1408718660-7295-1-git-send-email-peter.maydell@linaro.org
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 38d2b8445a..aa42803959 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -123,9 +123,10 @@ static void aarch64_a57_initfn(Object *obj)
     cpu->id_isar2 = 0x21232042;
     cpu->id_isar3 = 0x01112131;
     cpu->id_isar4 = 0x00011142;
+    cpu->id_isar5 = 0x00011121;
     cpu->id_aa64pfr0 = 0x00002222;
     cpu->id_aa64dfr0 = 0x10305106;
-    cpu->id_aa64isar0 = 0x00010000;
+    cpu->id_aa64isar0 = 0x00011120;
     cpu->id_aa64mmfr0 = 0x00001124;
     cpu->dbgdidr = 0x3516d000;
     cpu->clidr = 0x0a200023;

From 71a62046ae7accb4fdd4b2413a77342fc5e0c554 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Fri, 29 Aug 2014 15:00:28 +0100
Subject: [PATCH 04/16] arm_gic: Fix read of GICD_ICFGR

The GICD_ICFGR register covers 4 interrupts per byte.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Message-id: 1408372255-12358-2-git-send-email-adam@os.inf.tu-dresden.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/intc/arm_gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 1532ef9482..d2b1aaf485 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -372,7 +372,7 @@ static uint32_t gic_dist_readb(void *opaque, hwaddr offset)
         }
     } else if (offset < 0xf00) {
         /* Interrupt Configuration.  */
-        irq = (offset - 0xc00) * 2 + GIC_BASE_IRQ;
+        irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
         res = 0;

From 24b790df4388009c25cee311a4eff792c89cada1 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Fri, 29 Aug 2014 15:00:28 +0100
Subject: [PATCH 05/16] arm_gic: GICD_ICFGR: Write model only for pre v1 GICs

Setting the model is only available in pre-v1 GIC models.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Message-id: 1408372255-12358-3-git-send-email-adam@os.inf.tu-dresden.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/intc/arm_gic.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index d2b1aaf485..e5466470b0 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -561,10 +561,12 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         if (irq < GIC_INTERNAL)
             value |= 0xaa;
         for (i = 0; i < 4; i++) {
-            if (value & (1 << (i * 2))) {
-                GIC_SET_MODEL(irq + i);
-            } else {
-                GIC_CLEAR_MODEL(irq + i);
+            if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+                if (value & (1 << (i * 2))) {
+                    GIC_SET_MODEL(irq + i);
+                } else {
+                    GIC_CLEAR_MODEL(irq + i);
+                }
             }
             if (value & (2 << (i * 2))) {
                 GIC_SET_EDGE_TRIGGER(irq + i);

From de7a900f0cdeeb5ebcb9d4a56cba2f0da7477001 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Fri, 29 Aug 2014 15:00:28 +0100
Subject: [PATCH 06/16] arm_gic: Do not force PPIs to edge-triggered mode

Only SGIs must be WI, done by forcing them to their default
(edge-triggered).

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Message-id: 1408372255-12358-4-git-send-email-adam@os.inf.tu-dresden.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/intc/arm_gic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index e5466470b0..55019c9742 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -558,7 +558,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
         irq = (offset - 0xc00) * 4 + GIC_BASE_IRQ;
         if (irq >= s->num_irq)
             goto bad_reg;
-        if (irq < GIC_INTERNAL)
+        if (irq < GIC_NR_SGIS)
             value |= 0xaa;
         for (i = 0; i < 4; i++) {
             if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {

From 93b5f6f1a630f8db141d7ce9030d4fc2e3fa5b14 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 07/16] arm_gic: Use GIC_NR_SGIS constant

Use constant rather than a plain number.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Message-id: 1408372255-12358-5-git-send-email-adam@os.inf.tu-dresden.de
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/intc/arm_gic_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 6d884eca3b..18b01ba0c7 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -128,7 +128,7 @@ static void arm_gic_common_reset(DeviceState *dev)
         s->running_priority[i] = 0x100;
         s->cpu_enabled[i] = false;
     }
-    for (i = 0; i < 16; i++) {
+    for (i = 0; i < GIC_NR_SGIS; i++) {
         GIC_SET_ENABLED(i, ALL_CPU_MASK);
         GIC_SET_EDGE_TRIGGER(i);
     }

From d3579f362f61addb6b8b9c683f398d29af47eb23 Mon Sep 17 00:00:00 2001
From: Joel Schopp <joel.schopp@amd.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 08/16] aarch64: raise max_cpus to 8

I'm running on a system with 8 cpus and it would be nice to have qemu
support all of them.  The attached patch does that and has been tested.

That said, I'm not sure if 8 is enough or if we want to bump this even higher
now before systems with many more cpus come along. 255 anyone?

Cc: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Joel Schopp <joel.schopp@amd.com>
Message-id: 20140819213304.19537.2834.stgit@joelaarch64.amd.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/virt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bd206a019a..d6fffc75bd 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -541,7 +541,7 @@ static QEMUMachine machvirt_a15_machine = {
     .name = "virt",
     .desc = "ARM Virtual Machine",
     .init = machvirt_init,
-    .max_cpus = 4,
+    .max_cpus = 8,
 };
 
 static void machvirt_machine_init(void)

From b52b81e44f7d087a7b06217eb83cd79f8bf2fb05 Mon Sep 17 00:00:00 2001
From: Sergey Fedorov <serge.fdrv@gmail.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 09/16] hw/intc/arm_gic: honor target mask in gic_update()

Take IRQ target mask into account when determining the highest priority
pending interrupt.

Signed-off-by: Sergey Fedorov <serge.fdrv@gmail.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Message-id: 1407947471-26981-1-git-send-email-serge.fdrv@gmail.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/intc/arm_gic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 55019c9742..db9110c1c4 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -66,7 +66,8 @@ void gic_update(GICState *s)
         best_prio = 0x100;
         best_irq = 1023;
         for (irq = 0; irq < s->num_irq; irq++) {
-            if (GIC_TEST_ENABLED(irq, cm) && gic_test_pending(s, irq, cm)) {
+            if (GIC_TEST_ENABLED(irq, cm) && gic_test_pending(s, irq, cm) &&
+                (irq < GIC_INTERNAL || GIC_TARGET(irq) & cm)) {
                 if (GIC_GET_PRIORITY(irq, cpu) < best_prio) {
                     best_prio = GIC_GET_PRIORITY(irq, cpu);
                     best_irq = irq;

From c92c06872a092eaba64ea7e56ceff762be48b093 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 10/16] target-arm: Make the ARM PMCCNTR register 64-bit

This makes the PMCCNTR register 64-bit to allow for the
64-bit ARMv8 version.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Message-id: 6c5bac5fd0ea54963b1fc0e7f9464909f2e19a73.1409025949.git.peter.crosthwaite@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu.h    |  2 +-
 target-arm/helper.c | 19 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 659b104608..986c249038 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -224,7 +224,7 @@ typedef struct CPUARMState {
         /* If the counter is enabled, this stores the last time the counter
          * was reset. Otherwise it stores the counter value
          */
-        uint32_t c15_ccnt;
+        uint64_t c15_ccnt;
     } cp15;
 
     struct {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2a77c97c7b..711ca12b12 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -551,11 +551,10 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
-    /* Don't computer the number of ticks in user mode */
-    uint32_t temp_ticks;
+    uint64_t temp_ticks;
 
-    temp_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                          get_ticks_per_sec(), 1000000);
 
     if (env->cp15.c9_pmcr & PMCRE) {
         /* If the counter is enabled */
@@ -587,15 +586,15 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    uint32_t total_ticks;
+    uint64_t total_ticks;
 
     if (!(env->cp15.c9_pmcr & PMCRE)) {
         /* Counter is disabled, do not change value */
         return env->cp15.c15_ccnt;
     }
 
-    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                           get_ticks_per_sec(), 1000000);
 
     if (env->cp15.c9_pmcr & PMCRD) {
         /* Increment once every 64 processor clock cycles */
@@ -607,7 +606,7 @@ static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    uint32_t total_ticks;
+    uint64_t total_ticks;
 
     if (!(env->cp15.c9_pmcr & PMCRE)) {
         /* Counter is disabled, set the absolute value */
@@ -615,8 +614,8 @@ static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
         return;
     }
 
-    total_ticks = qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) *
-                  get_ticks_per_sec() / 1000000;
+    total_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                           get_ticks_per_sec(), 1000000);
 
     if (env->cp15.c9_pmcr & PMCRD) {
         /* Increment once every 64 processor clock cycles */

From 421c7ebd93d33a3276b78985b9e25cfea35692f0 Mon Sep 17 00:00:00 2001
From: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 11/16] arm: Implement PMCCNTR 32b read-modify-write

The register is now 64bit, however a 32 bit write to the register
should leave the higher bits unchanged. The open coded write handler
does not implement this, so we need to read-modify-write accordingly.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Reviewed-by: Alistair Francis <alistair23@gmail.com>
Message-id: ec350573424bb2adc1701c3b9278d26598e2f2d1.1409025949.git.peter.crosthwaite@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 711ca12b12..0d2ee41337 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -623,6 +623,15 @@ static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     }
     env->cp15.c15_ccnt = total_ticks - value;
 }
+
+static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    uint64_t cur_val = pmccntr_read(env, NULL);
+
+    pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
+}
+
 #endif
 
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -754,7 +763,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
 #ifndef CONFIG_USER_ONLY
     { .name = "PMCCNTR", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 0,
       .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
-      .readfn = pmccntr_read, .writefn = pmccntr_write,
+      .readfn = pmccntr_read, .writefn = pmccntr_write32,
       .accessfn = pmreg_access },
 #endif
     { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,

From 8521466b391f92681d64eabfeeca17de9b03585d Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 12/16] target-arm: Implement PMCCNTR_EL0 and related registers

This patch adds support for the ARMv8 version of the PMCCNTR and
related registers. It also starts to implement the PMCCFILTR_EL0
register.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Message-id: b5d1094764a5416363ee95216799b394ecd011e8.1409025949.git.peter.crosthwaite@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu.h    |  5 +++--
 target-arm/helper.c | 45 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 986c249038..60dea03092 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -191,8 +191,8 @@ typedef struct CPUARMState {
         uint64_t par_el1;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
-        uint32_t c9_pmcr; /* performance monitor control register */
-        uint32_t c9_pmcnten; /* perf monitor counter enables */
+        uint64_t c9_pmcr; /* performance monitor control register */
+        uint64_t c9_pmcnten; /* perf monitor counter enables */
         uint32_t c9_pmovsr; /* perf monitor overflow status */
         uint32_t c9_pmxevtyper; /* perf monitor event type */
         uint32_t c9_pmuserenr; /* perf monitor user enable */
@@ -225,6 +225,7 @@ typedef struct CPUARMState {
          * was reset. Otherwise it stores the counter value
          */
         uint64_t c15_ccnt;
+        uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
     } cp15;
 
     struct {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 0d2ee41337..13507f778b 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -736,16 +736,28 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
      * or PL0_RO as appropriate and then check PMUSERENR in the helper fn.
      */
     { .name = "PMCNTENSET", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 1,
-      .access = PL0_RW, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .access = PL0_RW, .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
       .writefn = pmcntenset_write,
       .accessfn = pmreg_access,
       .raw_writefn = raw_write },
+    { .name = "PMCNTENSET_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 1,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), .resetvalue = 0,
+      .writefn = pmcntenset_write, .raw_writefn = raw_write },
     { .name = "PMCNTENCLR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 2,
-      .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .access = PL0_RW,
+      .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten),
       .accessfn = pmreg_access,
       .writefn = pmcntenclr_write,
       .type = ARM_CP_NO_MIGRATE },
+    { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_NO_MIGRATE,
+      .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten),
+      .writefn = pmcntenclr_write },
     { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3,
       .access = PL0_RW, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmovsr),
       .accessfn = pmreg_access,
@@ -765,7 +777,18 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
       .access = PL0_RW, .resetvalue = 0, .type = ARM_CP_IO,
       .readfn = pmccntr_read, .writefn = pmccntr_write32,
       .accessfn = pmreg_access },
+    { .name = "PMCCNTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 13, .opc2 = 0,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_IO,
+      .readfn = pmccntr_read, .writefn = pmccntr_write, },
 #endif
+    { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
+      .access = PL0_RW, .accessfn = pmreg_access,
+      .type = ARM_CP_IO,
+      .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0),
+      .resetvalue = 0, },
     { .name = "PMXEVTYPER", .cp = 15, .crn = 9, .crm = 13, .opc1 = 0, .opc2 = 1,
       .access = PL0_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c9_pmxevtyper),
@@ -2394,13 +2417,23 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 #ifndef CONFIG_USER_ONLY
         ARMCPRegInfo pmcr = {
             .name = "PMCR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 0,
-            .access = PL0_RW, .resetvalue = cpu->midr & 0xff000000,
-            .type = ARM_CP_IO,
-            .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+            .access = PL0_RW,
+            .type = ARM_CP_IO | ARM_CP_NO_MIGRATE,
+            .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcr),
             .accessfn = pmreg_access, .writefn = pmcr_write,
             .raw_writefn = raw_write,
         };
+        ARMCPRegInfo pmcr64 = {
+            .name = "PMCR_EL0", .state = ARM_CP_STATE_AA64,
+            .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 0,
+            .access = PL0_RW, .accessfn = pmreg_access,
+            .type = ARM_CP_IO,
+            .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcr),
+            .resetvalue = cpu->midr & 0xff000000,
+            .writefn = pmcr_write, .raw_writefn = raw_write,
+        };
         define_one_arm_cp_reg(cpu, &pmcr);
+        define_one_arm_cp_reg(cpu, &pmcr64);
 #endif
         ARMCPRegInfo clidr = {
             .name = "CLIDR", .state = ARM_CP_STATE_BOTH,

From 87124fdea443322924be00eb79430a6243cf0747 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 13/16] target-arm: Add arm_ccnt_enabled function

Include a helper function to determine if the CCNT counter
is enabled.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Message-id: e1a64f17a756e06c8bda8238ad4826d705049f7a.1409025949.git.peter.crosthwaite@xilinx.com
[ PC changes
  * Remove EL based checks
]
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 13507f778b..e6c82ab0b8 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -548,6 +548,18 @@ static CPAccessResult pmreg_access(CPUARMState *env, const ARMCPRegInfo *ri)
 }
 
 #ifndef CONFIG_USER_ONLY
+
+static inline bool arm_ccnt_enabled(CPUARMState *env)
+{
+    /* This does not support checking PMCCFILTR_EL0 register */
+
+    if (!(env->cp15.c9_pmcr & PMCRE)) {
+        return false;
+    }
+
+    return true;
+}
+
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {

From ec7b4ce4c7864337a336721721d48456b4b5b51d Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:29 +0100
Subject: [PATCH 14/16] target-arm: Implement pmccntr_sync function

This is used to synchronise the PMCCNTR counter and swap its
state between enabled and disabled if required. It must always
be called twice, both before and after any logic that could
change the state of the PMCCNTR counter.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Message-id: 62811d4c0f7b1384f7aab62ea2fcfda3dcb0db50.1409025949.git.peter.crosthwaite@xilinx.com
[PMM: fixed minor typos in pmccntr_sync doc comment]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/cpu.h    | 11 +++++++++++
 target-arm/helper.c | 23 +++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 60dea03092..51bedc8262 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -353,6 +353,17 @@ int cpu_arm_signal_handler(int host_signum, void *pinfo,
 int arm_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
                              int mmu_idx);
 
+/**
+ * pmccntr_sync
+ * @env: CPUARMState
+ *
+ * Synchronises the counter in the PMCCNTR. This must always be called twice,
+ * once before any action that might affect the timer and again afterwards.
+ * The function is used to swap the state of the register if required.
+ * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ */
+void pmccntr_sync(CPUARMState *env);
+
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
  * for both old and new bit meanings. Code which tests against those
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e6c82ab0b8..fa79dfa614 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -560,6 +560,23 @@ static inline bool arm_ccnt_enabled(CPUARMState *env)
     return true;
 }
 
+void pmccntr_sync(CPUARMState *env)
+{
+    uint64_t temp_ticks;
+
+    temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
+                          get_ticks_per_sec(), 1000000);
+
+    if (env->cp15.c9_pmcr & PMCRD) {
+        /* Increment once every 64 processor clock cycles */
+        temp_ticks /= 64;
+    }
+
+    if (arm_ccnt_enabled(env)) {
+        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+    }
+}
+
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
@@ -644,6 +661,12 @@ static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
     pmccntr_write(env, ri, deposit64(cur_val, 0, 32, value));
 }
 
+#else /* CONFIG_USER_ONLY */
+
+void pmccntr_sync(CPUARMState *env)
+{
+}
+
 #endif
 
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,

From 942a155b2098d7cd9b6f93e251cd06fe11d96bcf Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:30 +0100
Subject: [PATCH 15/16] target-arm: Remove old code and replace with new
 functions

Remove the old PMCCNTR code and replace it with calls to the new
pmccntr_sync() and arm_ccnt_enabled() functions.

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Message-id: 693a6e437d915c2195fd3dc7303f384ca538b7bf.1409025949.git.peter.crosthwaite@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index fa79dfa614..d213ed14ad 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -580,20 +580,7 @@ void pmccntr_sync(CPUARMState *env)
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
-    uint64_t temp_ticks;
-
-    temp_ticks = muldiv64(qemu_clock_get_us(QEMU_CLOCK_VIRTUAL),
-                          get_ticks_per_sec(), 1000000);
-
-    if (env->cp15.c9_pmcr & PMCRE) {
-        /* If the counter is enabled */
-        if (env->cp15.c9_pmcr & PMCRD) {
-            /* Increment once every 64 processor clock cycles */
-            env->cp15.c15_ccnt = (temp_ticks/64) - env->cp15.c15_ccnt;
-        } else {
-            env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
-        }
-    }
+    pmccntr_sync(env);
 
     if (value & PMCRC) {
         /* The counter has been reset */
@@ -604,20 +591,14 @@ static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
 
-    if (env->cp15.c9_pmcr & PMCRE) {
-        if (env->cp15.c9_pmcr & PMCRD) {
-            /* Increment once every 64 processor clock cycles */
-            temp_ticks /= 64;
-        }
-        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
-    }
+    pmccntr_sync(env);
 }
 
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     uint64_t total_ticks;
 
-    if (!(env->cp15.c9_pmcr & PMCRE)) {
+    if (!arm_ccnt_enabled(env)) {
         /* Counter is disabled, do not change value */
         return env->cp15.c15_ccnt;
     }
@@ -637,7 +618,7 @@ static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     uint64_t total_ticks;
 
-    if (!(env->cp15.c9_pmcr & PMCRE)) {
+    if (!arm_ccnt_enabled(env)) {
         /* Counter is disabled, set the absolute value */
         env->cp15.c15_ccnt = value;
         return;

From 0614601cecc8e5d9c6c5fa606b39fe388a18583a Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@xilinx.com>
Date: Fri, 29 Aug 2014 15:00:30 +0100
Subject: [PATCH 16/16] target-arm: Implement pmccfiltr_write function

This is the function that is called when writing to the
PMCCFILTR_EL0 register

Signed-off-by: Alistair Francis <alistair.francis@xilinx.com>
Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Message-id: 73da3da6404855b17d5ae82975a32ff3a4dcae3d.1409025949.git.peter.crosthwaite@xilinx.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/helper.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/target-arm/helper.c b/target-arm/helper.c
index d213ed14ad..2b95f33872 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -650,6 +650,14 @@ void pmccntr_sync(CPUARMState *env)
 
 #endif
 
+static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                            uint64_t value)
+{
+    pmccntr_sync(env);
+    env->cp15.pmccfiltr_el0 = value & 0x7E000000;
+    pmccntr_sync(env);
+}
+
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
@@ -801,6 +809,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
 #endif
     { .name = "PMCCFILTR_EL0", .state = ARM_CP_STATE_AA64,
       .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 15, .opc2 = 7,
+      .writefn = pmccfiltr_write,
       .access = PL0_RW, .accessfn = pmreg_access,
       .type = ARM_CP_IO,
       .fieldoffset = offsetof(CPUARMState, cp15.pmccfiltr_el0),