Skip to content

make deterministic_hash host-architecture-independent #38357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/librustc/hir/map/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

use hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::StableHasher;
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
use syntax::ast;
use syntax::symbol::{Symbol, InternedString};
use ty::TyCtxt;
Expand Down Expand Up @@ -131,7 +131,8 @@ impl DefPath {
}

pub fn deterministic_hash(&self, tcx: TyCtxt) -> u64 {
let mut state = DefaultHasher::new();
debug!("deterministic_hash({:?})", self);
let mut state = StableHasher::new();
self.deterministic_hash_to(tcx, &mut state);
state.finish()
}
Expand Down Expand Up @@ -377,4 +378,3 @@ impl DefPathData {
self.as_interned_str().to_string()
}
}

104 changes: 16 additions & 88 deletions src/librustc/ty/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ use util::nodemap::FxHashMap;
use middle::lang_items;

use rustc_const_math::{ConstInt, ConstIsize, ConstUsize};
use rustc_data_structures::stable_hasher::{StableHasher, StableHasherResult};

use std::cell::RefCell;
use std::cmp;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
use std::hash::Hash;
use std::intrinsics;
use syntax::ast::{self, Name};
use syntax::attr::{self, SignedInt, UnsignedInt};
Expand Down Expand Up @@ -349,7 +349,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
/// Creates a hash of the type `Ty` which will be the same no matter what crate
/// context it's calculated within. This is used by the `type_id` intrinsic.
pub fn type_id_hash(self, ty: Ty<'tcx>) -> u64 {
let mut hasher = TypeIdHasher::new(self, DefaultHasher::default());
let mut hasher = TypeIdHasher::new(self);
hasher.visit_ty(ty);
hasher.finish()
}
Expand Down Expand Up @@ -395,96 +395,26 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
}
}

/// When hashing a type this ends up affecting properties like symbol names. We
/// want these symbol names to be calculated independent of other factors like
/// what architecture you're compiling *from*.
///
/// The hashing just uses the standard `Hash` trait, but the implementations of
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
/// `isize` completely when hashing. To ensure that these don't leak in we use a
/// custom hasher implementation here which inflates the size of these to a `u64`
/// and `i64`.
///
/// The same goes for endianess: We always convert multi-byte integers to little
/// endian before hashing.
#[derive(Debug)]
pub struct ArchIndependentHasher<H> {
inner: H,
}

impl<H> ArchIndependentHasher<H> {
pub fn new(inner: H) -> ArchIndependentHasher<H> {
ArchIndependentHasher { inner: inner }
}

pub fn into_inner(self) -> H {
self.inner
}
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, W> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: StableHasher<W>,
}

impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes)
}

fn finish(&self) -> u64 {
self.inner.finish()
}

fn write_u8(&mut self, i: u8) {
self.inner.write_u8(i)
}
fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i.to_le())
}
fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i.to_le())
}
fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i.to_le())
}
fn write_usize(&mut self, i: usize) {
self.inner.write_u64((i as u64).to_le())
impl<'a, 'gcx, 'tcx, W> TypeIdHasher<'a, 'gcx, 'tcx, W>
where W: StableHasherResult
{
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>) -> Self {
TypeIdHasher { tcx: tcx, state: StableHasher::new() }
}
fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i)
}
fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i.to_le())
}
fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i.to_le())
}
fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i.to_le())
}
fn write_isize(&mut self, i: isize) {
self.inner.write_i64((i as i64).to_le())
}
}

pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: ArchIndependentHasher<H>,
}

impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
TypeIdHasher {
tcx: tcx,
state: ArchIndependentHasher::new(state),
}
pub fn finish(self) -> W {
self.state.finish()
}

pub fn hash<T: Hash>(&mut self, x: T) {
x.hash(&mut self.state);
}

pub fn finish(self) -> u64 {
self.state.finish()
}

fn hash_discriminant_u8<T>(&mut self, x: &T) {
let v = unsafe {
intrinsics::discriminant_value(x)
Expand All @@ -504,13 +434,11 @@ impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
def_path.deterministic_hash_to(self.tcx, &mut self.state);
}

pub fn into_inner(self) -> H {
self.state.inner
}
}

impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {
impl<'a, 'gcx, 'tcx, W> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, W>
where W: StableHasherResult
{
fn visit_ty(&mut self, ty: Ty<'tcx>) -> bool {
// Distinguish between the Ty variants uniformly.
self.hash_discriminant_u8(&ty.sty);
Expand Down
3 changes: 3 additions & 0 deletions src/librustc_data_structures/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ extern crate serialize as rustc_serialize; // used by deriving
#[cfg(unix)]
extern crate libc;

pub use rustc_serialize::hex::ToHex;

pub mod array_vec;
pub mod accumulate_vec;
pub mod small_vec;
Expand All @@ -59,6 +61,7 @@ pub mod indexed_vec;
pub mod obligation_forest;
pub mod snapshot_map;
pub mod snapshot_vec;
pub mod stable_hasher;
pub mod transitive_relation;
pub mod unify;
pub mod fnv;
Expand Down
176 changes: 176 additions & 0 deletions src/librustc_data_structures/stable_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::hash::Hasher;
use std::marker::PhantomData;
use std::mem;
use blake2b::Blake2bHasher;
use rustc_serialize::leb128;

fn write_unsigned_leb128_to_buf(buf: &mut [u8; 16], value: u64) -> usize {
leb128::write_unsigned_leb128_to(value, |i, v| buf[i] = v)
}

fn write_signed_leb128_to_buf(buf: &mut [u8; 16], value: i64) -> usize {
leb128::write_signed_leb128_to(value, |i, v| buf[i] = v)
}

/// When hashing something that ends up affecting properties like symbol names. We
/// want these symbol names to be calculated independent of other factors like
/// what architecture you're compiling *from*.
///
/// The hashing just uses the standard `Hash` trait, but the implementations of
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
/// `isize` completely when hashing.
///
/// To do that, we encode all integers to be hashed with some
/// arch-independent encoding.
///
/// At the moment, we pass i8/u8 straight through and encode
/// all other integers using leb128.
///
/// This hasher currently always uses the stable Blake2b algorithm
/// and allows for variable output lengths through its type
/// parameter.
#[derive(Debug)]
pub struct StableHasher<W> {
state: Blake2bHasher,
bytes_hashed: u64,
width: PhantomData<W>,
}

pub trait StableHasherResult: Sized {
fn finish(hasher: StableHasher<Self>) -> Self;
}

impl<W: StableHasherResult> StableHasher<W> {
pub fn new() -> Self {
StableHasher {
state: Blake2bHasher::new(mem::size_of::<W>(), &[]),
bytes_hashed: 0,
width: PhantomData,
}
}

pub fn finish(self) -> W {
W::finish(self)
}
}

impl StableHasherResult for [u8; 20] {
fn finish(mut hasher: StableHasher<Self>) -> Self {
let mut result: [u8; 20] = [0; 20];
result.copy_from_slice(hasher.state.finalize());
result
}
}

impl StableHasherResult for u64 {
fn finish(mut hasher: StableHasher<Self>) -> Self {
hasher.state.finalize();
hasher.state.finish()
}
}

impl<W> StableHasher<W> {
#[inline]
pub fn finalize(&mut self) -> &[u8] {
self.state.finalize()
}

#[inline]
pub fn bytes_hashed(&self) -> u64 {
self.bytes_hashed
}

#[inline]
fn write_uleb128(&mut self, value: u64) {
let mut buf = [0; 16];
let len = write_unsigned_leb128_to_buf(&mut buf, value);
self.state.write(&buf[..len]);
self.bytes_hashed += len as u64;
}

#[inline]
fn write_ileb128(&mut self, value: i64) {
let mut buf = [0; 16];
let len = write_signed_leb128_to_buf(&mut buf, value);
self.state.write(&buf[..len]);
self.bytes_hashed += len as u64;
}
}

// For the non-u8 integer cases we leb128 encode them first. Because small
// integers dominate, this significantly and cheaply reduces the number of
// bytes hashed, which is good because blake2b is expensive.
impl<W> Hasher for StableHasher<W> {
fn finish(&self) -> u64 {
panic!("use StableHasher::finish instead");
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
self.state.write(bytes);
self.bytes_hashed += bytes.len() as u64;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.state.write_u8(i);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bytes_hashed is not incremented here.

self.bytes_hashed += 1;
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.write_uleb128(i);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_i8(&mut self, i: i8) {
self.state.write_i8(i);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

self.bytes_hashed += 1;
}

#[inline]
fn write_i16(&mut self, i: i16) {
self.write_ileb128(i as i64);
}

#[inline]
fn write_i32(&mut self, i: i32) {
self.write_ileb128(i as i64);
}

#[inline]
fn write_i64(&mut self, i: i64) {
self.write_ileb128(i);
}

#[inline]
fn write_isize(&mut self, i: isize) {
self.write_ileb128(i as i64);
}
}
Loading