/* Manage TLS descriptors.  x86_64 version.
   Copyright (C) 2005 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <link.h>
#include <ldsodefs.h>
#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>

#ifdef USE_TLS
# ifdef SHARED

extern void weak_function free (void *ptr);

/* The hashcode handling code below is heavily inspired in libiberty's
   hashtab code, but with most adaptation points and support for
   deleting elements removed.

   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   Contributed by Vladimir Makarov (vmakarov@cygnus.com).  */

inline static unsigned long
higher_prime_number (unsigned long n)
{
  /* These are primes that are near, but slightly smaller than, a
     power of two.  */
  static const unsigned long primes[] = {
    (unsigned long) 7,
    (unsigned long) 13,
    (unsigned long) 31,
    (unsigned long) 61,
    (unsigned long) 127,
    (unsigned long) 251,
    (unsigned long) 509,
    (unsigned long) 1021,
    (unsigned long) 2039,
    (unsigned long) 4093,
    (unsigned long) 8191,
    (unsigned long) 16381,
    (unsigned long) 32749,
    (unsigned long) 65521,
    (unsigned long) 131071,
    (unsigned long) 262139,
    (unsigned long) 524287,
    (unsigned long) 1048573,
    (unsigned long) 2097143,
    (unsigned long) 4194301,
    (unsigned long) 8388593,
    (unsigned long) 16777213,
    (unsigned long) 33554393,
    (unsigned long) 67108859,
    (unsigned long) 134217689,
    (unsigned long) 268435399,
    (unsigned long) 536870909,
    (unsigned long) 1073741789,
    (unsigned long) 2147483647,
					/* 4294967291L */
    ((unsigned long) 2147483647) + ((unsigned long) 2147483644),
  };

  const unsigned long *low = &primes[0];
  const unsigned long *high = &primes[sizeof(primes) / sizeof(primes[0])];

  while (low != high)
    {
      const unsigned long *mid = low + (high - low) / 2;
      if (n > *mid)
	low = mid + 1;
      else
	high = mid;
    }

#if 0
  /* If we've run out of primes, abort.  */
  if (n > *low)
    {
      fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
      abort ();
    }
#endif

  return *low;
}

struct hashtab
{
  /* Table itself.  */
  void **entries;

  /* Current size (in entries) of the hash table */
  size_t size;

  /* Current number of elements.  */
  size_t n_elements;
};

inline static struct hashtab *
htab_create (void)
{
  struct hashtab *ht = malloc (sizeof (struct hashtab));

  if (! ht)
    return NULL;
  ht->size = 3;
  ht->entries = malloc (sizeof (void *) * ht->size);
  if (! ht->entries)
    return NULL;

  ht->n_elements = 0;

  memset (ht->entries, 0, sizeof (void *) * ht->size);

  return ht;
}

/* This is only called from _dl_unmap, so it's safe to call
   free().  See the discussion below.  */
inline static void
htab_delete (struct hashtab *htab)
{
  int i;

  for (i = htab->size - 1; i >= 0; i--)
    if (htab->entries[i])
      free (htab->entries[i]);

  free (htab->entries);
  free (htab);
}

/* Similar to htab_find_slot, but without several unwanted side effects:
    - Does not call htab->eq_f when it finds an existing entry.
    - Does not change the count of elements/searches/collisions in the
      hash table.
   This function also assumes there are no deleted entries in the table.
   HASH is the hash value for the element to be inserted.  */

inline static void **
find_empty_slot_for_expand (struct hashtab *htab, int hash)
{
  size_t size = htab->size;
  unsigned int index = hash % size;
  void **slot = htab->entries + index;
  int hash2;

  if (! *slot)
    return slot;

  hash2 = 1 + hash % (size - 2);
  for (;;)
    {
      index += hash2;
      if (index >= size)
	index -= size;

      slot = htab->entries + index;
      if (! *slot)
	return slot;
    }
}

/* The following function changes size of memory allocated for the
   entries and repeatedly inserts the table elements.  The occupancy
   of the table after the call will be about 50%.  Naturally the hash
   table must already exist.  Remember also that the place of the
   table entries is changed.  If memory allocation failures are allowed,
   this function will return zero, indicating that the table could not be
   expanded.  If all goes well, it will return a non-zero value.  */

inline static int
htab_expand (struct hashtab *htab, int (*hash_fn)(void *))
{
  void **oentries;
  void **olimit;
  void **p;
  void **nentries;
  size_t nsize;

  oentries = htab->entries;
  olimit = oentries + htab->size;

  /* Resize only when table after removal of unused elements is either
     too full or too empty.  */
  if (htab->n_elements * 2 > htab->size)
    nsize = higher_prime_number (htab->n_elements * 2);
  else
    nsize = htab->size;

  nentries = malloc (sizeof (void *) * nsize);
  memset (nentries, 0, sizeof (void *) * nsize);
  if (nentries == NULL)
    return 0;
  htab->entries = nentries;
  htab->size = nsize;

  p = oentries;
  do
    {
      if (*p)
	*find_empty_slot_for_expand (htab, hash_fn (*p))
	  = *p;

      p++;
    }
  while (p < olimit);

#if 0 /* We can't tell whether this was allocated by the malloc()
	 built into ld.so or the one in the main executable or libc,
	 and calling free() for something that wasn't malloc()ed could
	 do Very Bad Things (TM).  Take the conservative approach
	 here, potentially wasting as much memory as actually used by
	 the hash table, even if multiple growths occur.  That's not
	 so bad as to require some overengineered solution that would
	 enable us to keep track of how it was allocated. */
  free (oentries);
#endif
  return 1;
}

/* This function searches for a hash table slot containing an entry
   equal to the given element.  To delete an entry, call this with
   INSERT = 0, then call htab_clear_slot on the slot returned (possibly
   after doing some checks).  To insert an entry, call this with
   INSERT = 1, then write the value you want into the returned slot.
   When inserting an entry, NULL may be returned if memory allocation
   fails.  */

inline static void **
htab_find_slot (struct hashtab *htab, void *ptr, int insert,
		int (*hash_fn)(void *), int (*eq_fn)(void *, void *))
{
  unsigned int index;
  int hash, hash2;
  size_t size;
  void **entry;

  if (htab->size * 3 <= htab->n_elements * 4
      && htab_expand (htab, hash_fn) == 0)
    return NULL;

  hash = hash_fn (ptr);

  size = htab->size;
  index = hash % size;

  entry = &htab->entries[index];
  if (!*entry)
    goto empty_entry;
  else if (eq_fn (*entry, ptr))
    return entry;

  hash2 = 1 + hash % (size - 2);
  for (;;)
    {
      index += hash2;
      if (index >= size)
	index -= size;

      entry = &htab->entries[index];
      if (!*entry)
	goto empty_entry;
      else if (eq_fn (*entry, ptr))
	return entry;
    }

 empty_entry:
  if (!insert)
    return NULL;

  htab->n_elements++;
  return entry;
}

inline static int
hash_tlsdesc(void *p)
{
  struct tlsdesc_dynamic_arg *td = p;

  /* We know all entries are for the same module, so ti_offset is the
     only distinguishing entry.  */
  return td->tlsinfo.ti_offset;
}

inline static int
eq_tlsdesc(void *p, void *q)
{
  struct tlsdesc_dynamic_arg *tdp = p, *tdq = q;

  return tdp->tlsinfo.ti_offset == tdq->tlsinfo.ti_offset;
}

inline static int
map_generation (struct link_map *map)
{
  size_t idx = map->l_tls_modid;
  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);

  /* Find the place in the dtv slotinfo list.  */
  do
    {
      /* Does it fit in the array of this list element?  */
      if (idx < listp->len)
	{
	  /* We should never get here for a module in static TLS, so
	     we can assume that, if the generation count is zero, we
	     still haven't determined the generation count for this
	     module.  */
	  if (listp->slotinfo[idx].gen)
	    return listp->slotinfo[idx].gen;
	  else
	    break;
	}
      idx -= listp->len;
      listp = listp->next;
    }
  while (listp != NULL);

  /* If we get to this point, the module still hasn't been assigned an
     entry in the dtv slotinfo data structures, and it will when we're
     done with relocations.  At that point, the module will get a
     generation number that is one past the current generation, so
     return exactly that.  */
  return GL(dl_tls_generation) + 1;
}

void *
_dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
{
  struct hashtab *ht;
  void **entry;
  struct tlsdesc_dynamic_arg *td, test;

  /* FIXME: We could use a per-map lock here, but is it worth it?  */
  __rtld_lock_lock_recursive (GL(dl_load_lock));

  ht = map->l_mach.tlsdesc_table;
  if (! ht)
    {
      ht = htab_create ();
      if (! ht)
	{
	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
	  return 0;
	}
      map->l_mach.tlsdesc_table = ht;
    }

  test.tlsinfo.ti_module = map->l_tls_modid;
  test.tlsinfo.ti_offset = ti_offset;
  entry = htab_find_slot (ht, &test, 1, hash_tlsdesc, eq_tlsdesc);
  if (*entry)
    {
      td = *entry;
      __rtld_lock_unlock_recursive (GL(dl_load_lock));
      return td;
    }

  *entry = td = malloc (sizeof (struct tlsdesc_dynamic_arg));
  /* This may be higher than the map's generation, but it doesn't
     matter much.  Worst case, we'll have one extra DTV update per
     thread.  */
  td->gen_count = map_generation (map);
  td->tlsinfo = test.tlsinfo;

  __rtld_lock_unlock_recursive (GL(dl_load_lock));
  return td;
}

# endif /* SHARED */

/* The idea of the following two functions is to stop multiple threads
   from attempting to resolve the same TLS descriptor without busy
   waiting.  Ideally, we should be able to release the lock right
   after changing td->entry, and then using say a condition variable
   or a futex wake to wake up any waiting threads, but let's try to
   avoid introducing such dependencies.  */

inline static int
_dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
{
  if (caller != td->entry)
    return 1;

  __rtld_lock_lock_recursive (GL(dl_load_lock));
  if (caller != td->entry)
    {
      __rtld_lock_unlock_recursive (GL(dl_load_lock));
      return 1;
    }

  td->entry = _dl_tlsdesc_resolve_hold;

  return 0;
}

inline static void
_dl_tlsdesc_wake_up_held_fixups (void)
{
  __rtld_lock_unlock_recursive (GL(dl_load_lock));
}

/* The following 2 functions take an entry_check_offset argument.
   It's computed by the caller as an offset between its entry point
   and the call site, such that by adding the built-in return address
   that is implicitly passed to the function with this offset, we can
   easily obtain the caller's entry point to compare with the entry
   point given in the TLS descriptor.  If it's changed, we want to
   return immediately.  */

/* These macros are copied from elf/dl-reloc.c */

#define CHECK_STATIC_TLS(map, sym_map)					\
    do {								\
      if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET	\
			    || ((sym_map)->l_tls_offset			\
				== FORCED_DYNAMIC_TLS_OFFSET), 0))	\
	_dl_allocate_static_tls (sym_map);				\
    } while (0)

#define TRY_STATIC_TLS(map, sym_map)					\
    (__builtin_expect ((sym_map)->l_tls_offset				\
		       != FORCED_DYNAMIC_TLS_OFFSET, 1)			\
     && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1)	\
	 || _dl_try_allocate_static_tls (sym_map) == 0))

int internal_function _dl_try_allocate_static_tls (struct link_map *map);

/* This function is used to lazily resolve TLS_DESC RELA relocations.
   The argument location is used to hold a pointer to the relocation.  */

void
attribute_hidden
_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
				struct link_map *l)
{
  const ElfW(Rela) *reloc = td->arg;

  if (_dl_tlsdesc_resolve_early_return_p
      (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
    return;

  /* The code below was borrowed from _dl_fixup().  */
  const ElfW(Sym) *const symtab
    = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
  const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
  const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
  lookup_t result;

   /* Look up the target symbol.  If the normal lookup rules are not
      used don't look in the global scope.  */
  if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
      && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
    {
      const struct r_found_version *version = NULL;

      if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
	{
	  const ElfW(Half) *vernum =
	    (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
	  ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
	  version = &l->l_versions[ndx];
	  if (version->hash == 0)
	    version = NULL;
	}

      result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
				    l->l_scope, version, ELF_RTYPE_CLASS_PLT,
				    DL_LOOKUP_ADD_DEPENDENCY, NULL);
    }
  else
    {
      /* We already found the symbol.  The module (and therefore its load
	 address) is also known.  */
      result = l;
    }

  if (! sym)
    {
      td->arg = (void*)reloc->r_addend;
      td->entry = _dl_tlsdesc_undefweak;
    }
  else
    {
#  ifndef SHARED
      CHECK_STATIC_TLS (l, result);
#  else
      if (!TRY_STATIC_TLS (l, result))
	{
	  td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
					      + reloc->r_addend);
	  td->entry = _dl_tlsdesc_dynamic;
	}
      else
#  endif
	{
	  td->arg = (void*)(sym->st_value - result->l_tls_offset
			    + reloc->r_addend);
	  td->entry = _dl_tlsdesc_return;
	}
    }

  _dl_tlsdesc_wake_up_held_fixups ();
}

void
attribute_hidden
_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
				ptrdiff_t entry_check_offset)
{
  /* Maybe we're lucky and can return early.  */
  if (__builtin_return_address (0) - entry_check_offset != td->entry)
    return;

  /* Locking here will stop execution until the runnign resolver runs
     _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.

     FIXME: We'd be better off waiting on a condition variable, such
     that we didn't have to hold the lock throughout the relocation
     processing.  */
  __rtld_lock_lock_recursive (GL(dl_load_lock));
  __rtld_lock_unlock_recursive (GL(dl_load_lock));
}

#endif /* USE_TLS */

void
_dl_unmap (struct link_map *map)
{
  __munmap ((void *) (map)->l_map_start,
	    (map)->l_map_end - (map)->l_map_start);

#if USE_TLS && SHARED
  /* _dl_unmap is only called for dlopen()ed libraries, for which
     calling free() is safe, or before we've completed the initial
     relocation, in which case calling free() is probably pointless,
     but still safe.  */
  if (map->l_mach.tlsdesc_table)
    htab_delete (map->l_mach.tlsdesc_table);
#endif
}
