static char RcsId[]    = 
"@(#)$Header: gettrans.c,v 8.18 2000/11/12 13:24:03 goetz Rel $";

/*+*******************************************************************

 File:          gettrans.c

 Project:       Device Servers with SUN-RPC

 Description:   get transient program number  
		gettransient    -> Old version. Reserves port only for one
				   protocol.
		gettransient_ut -> New version. Reserves port for UDP and
				   TCP protocol.
 Author(s):     Jens Meyer

 Original:      09.01.1991


 $Revision: 8.18 $
 $Date: 2000/11/12 13:24:03 $

 $Author: goetz $

 $Log:	gettrans.c,v $
 * Revision 8.18  2000/11/12  13:24:03  13:24:03  goetz (Andy Goetz)
 * added dynamic error handling; improved TANGO error treatment
 * 
 Revision 8.17  1900/09/26 13:36:31  goetz
 tango_dev_putget() and tango_dev_putget_raw() implement immediate reconnection

 * Revision 8.16  2000/09/25  14:50:16  14:50:16  goetz (Andy Goetz)
 * tango_api implements TANGO_HOST, stateless import and reimport
 * 
 * Revision 8.15  20/0./7.  1.:6.:9.  1.:6.:9.  goetz (Andy Goetz)
 * changed from tango database to host mysql, added support for float and double arrays
 * 
 * Revision 8.14  20/0./6.  2.:4.:0.  2.:4.:0.  goetz (Andy Goetz)
 * fixed bug in dev_event_listen() which caused server to crash
 * 
 Revision 8.13  2000/06/02 21:56:46  goetz
 _DEVICE_H now _TANGO_DEVICE_H in Device.H; (char*)taco_tango

 Revision 8.12  2000/06/02 15:58:04  goetz
 dev_event_fire() now device specific; ported to SuSE V6.4

 Revision 8.11  2000/05/31 07:47:51  goetz
 tango_api has local copies of argc and argv, ported to HP-UX

 * Revision 8.10  2000/05/29  21:39:17  21:39:17  goetz (Andrew GOETZ)
 * fixed prototyping problems with C++ on HP-UX
 * 
 * Revision 8.9  2000/05/29  18:13:27  18:13:27  goetz (Andrew GOETZ)
 * fixed bug in dev_import_timeout() which prevented timeouts < 2s
 * 
 Revision 8.8  2000/05/02 15:32:11  goetz
 added prototype for tsleep() for OS9 C++

 * Revision 8.7  2000/05/02  14:19:33  14:19:33  goetz (Andy Goetz)
 * removed check for device server running on different host
 * 
 * Revision 8.6  2000/05/02  13:29:26  13:29:26  goetz (Andy Goetz)
 * added random sleep to gettransient + 3 retries
 * 
 Revision 8.5  2000/03/27 17:13:45  goetz
 declared pmap_getmaps() as external C function for OS9 C++

 Revision 8.4  2000/03/13 14:50:02  goetz
 import timeout now programmable using dev_import_timeout()

 Revision 8.3  2000/03/13 10:31:35  goetz
 added Jens' modifications to fix bug in local putget

 Revision 8.2  2000/03/10 17:01:23  goetz
 dev_synch() now excludes udp clients and new imports

 Revision 8.1  2000/01/18 16:48:45  goetz
 tango_dev_import() now only called in dev_import if -DTANGO

 Revision 8.0  1999/12/28 14:18:31  goetz
 added TANGO support for TACO dev_xxx() calls via -DTANGO for C++

 Revision 7.10  1999/11/25 08:27:13  goetz
 replaced fprintf(stderr) with printf; made startup() C++ compatible

 Revision 7.9  1999/11/22 20:18:59  goetz
 removed const from gettransient() prototype

 Revision 7.8  1999/11/21 20:45:37  goetz
 included all M.Diehl's patches (major changes to gettransient() + main())

 Revision 7.4  1999/07/09 05:15:27  goetz
 added M.Diehl's patch to DevServerSig.c to exit() after calling unregister_server()

 Revision 7.3  1999/06/07 15:26:58  goetz
 fixed bug with multi-nethost reimport, device name stored with nethost

 Revision 7.2  1999/05/12 15:21:58  goetz
 changed dev_event_fire() to void; fixed bug in dev_event_unlisten()

 * Revision 7.1  99/05/11  15:59:46  15:59:46  goetz (Andy Goetz)
 * replace static declaration of event_client[] array by malloc()
 * 
 Revision 7.0  1999/04/26 07:30:39  goetz
 implemented user events (added event_api.c)

 Copyright (c) 1990-1997 by European Synchrotron Radiation Facility,
                            Grenoble, France

********************************************************************-*/

#include <API.h>
#include <ApiP.h>

#include <Admin.h>

#ifdef unix
#include <rpc/pmap_clnt.h>
#include <rpc/pmap_prot.h>
#include <sys/socket.h>
#elif defined _NT
#include <pmapprot.h>
#include <pmapclnt.h>
#endif

#if ( OSK | _OSK )
#ifdef __cplusplus
extern "C" {
#endif
extern struct pmaplist *pmap_getmaps(struct sockaddr_in *address); 
extern unsigned int tsleep(unsigned int);
#ifdef __cplusplus
}
#endif
#include <rpc/pmap_clnt.h>
#include <rpc/pmap_prot.h>
#include <inet/socket.h>
#endif /* OSK | _OSK */

#ifdef vxworks
#include <taskLib.h>
#endif /* vxworks */

#ifdef _XOPEN_SOURCE_EXTENDED
#include <netinet/in.h>
#include <arpa/inet.h>
#endif /* XOPEN_SOURCE_EXTENDED */

#ifdef __hpux
#ifdef __cplusplus
extern "C" {
#endif
extern void get_myaddress (struct sockaddr_in *);
#ifdef __cplusplus
}
#endif
#endif /* hpux */

#include <stdlib.h>

/*+=======================================================================

A. Gotz,  27.04.2000

Problems with M.Diehl's gettransient() call :

* On OS9 if multiple programs (e.g. device servers or even rpcinfo) call 
  pmap_getmaps() simultaneously the portmapper dies ! 
* No protection against multiple programs getting the same program number
  and then trying to register it. 

In order to solve this I have added the following patch :

* A random sleep (using rand()) every time time gettransient is called 
  will reduce the chances of device servers calling pmap_getmap() at
  the same time thereby reducing the probability of the portmapper
  crashing on OS9** and device servers getting the same pmap list.

* The svc_register routine will make multiple attempts to get a
  free program number i.e. multiple calls to gettransient.

** The OS9 problem is still there but by introducing sleeps in
   the startup procedures of the device servers the problem can be avoided.
   This solution has been preferred to using semaphores to solve
   the problem because semaphores can block forever if a process
   crashes and because the problem is limited to OS9 (sigh).
   
M. Diehl, 15.11.99

Complete reimplementation of the transient RPC prognum allocation scheme :

* No need to bind sockets and set/unset prognums (and for the corresponding
  patches) anymore
* identical transient number hashing for real device servers, asynch.
  clients and the message/database server - i.e. only one function
* no collisions between database and message server during program number
  allocation. This was catching us in a race condition leading to the
  portmapper fork problem - the portmapper-patch for SuSE was apperently
  helpful only due to timing side-effects!
* selecting only prognums for which absolutely no version/proto combination
  has been registered before. This means, that exactly one call to
  gettransient() is required for every Taco service user.
* Fixing some inconsistencies in prognum/version handling by the asynch api,
  if called from a real device server, which has already registered the
  normal sync api services.
* BUT: still some (extremely small, however) chance for a race condition,
  if a number of servers is simultaneously started by some starting program
  and any 2 of them come up with the same hashvalue and for those a
  scheduler switch happens between the calls to gettransient() and the first
  svc_register(). This is extremely unlikely and the chance for such a
  collision could be reduced beyond any reasonable limit, if the starting
  program sleeps for, say some 100msec, between starting of any two
  programs. To make this absolutely failsafe one has to use a host-wide
  atomic locking of the critical section (using SYSV-IPC semaphores e.g.).
  Using the version 1/udp portmapper registration as a lock is not a good
  solution not only because the portmapper forks, but also because
  version 1/udp is needed for database and message server forcing us
  to unregister it again for some short moment - allowing the race
  condition between those 2 programs. This is a real issue since the
  old implementation started trying at the same prognum base. How critical
  this is was demonstrated by the influence of the portmap daemon compiling
  options (read: timing due to additional security checks!).
=========================================================================-*/


int gettransient( const char *ds_name )
{
#ifdef _NT
	LPpmaplist        plist, p;
#else
        struct pmaplist   *plist, *p;
#endif
	u_long	base=0x50000000, key=131, maxoffset=0x10000000;
	u_long  offset, prognum, i, found;
	struct sockaddr_in addr;
#ifdef _UCC
	long backoff;
#else
	struct timespec backoff;
#endif
	long pid, first=1;

	if (first)
	{
#if defined (_NT)
        	pid = _getpid ();
#else
#if !defined (vxworks)
        	pid = getpid ();
#else  /* !vxworks */
        	pid = taskIdSelf ();
#endif /* !vxworks */
#endif
		srand(pid);
		first = 0;
	}
/*
 * before doing anything first backoff a random fraction of a second
 * this will help reduce the probability that multiple servers interrogate
 * the portmapper at exactly the same time cf. above
 */
#ifdef _UCC
	backoff = 256.*(float)rand()/(float)RAND_MAX;
	tsleep(0x80000000|backoff);
#else
	backoff.tv_sec = 0;
	backoff.tv_nsec = 1000000000.*(float)rand()/(float)RAND_MAX;
	nanosleep(&backoff, NULL);
#endif

/*
 * determine the offset from base transient program number using a hashing
 * function to calculate an (almost) unique code based on the device
 * server full name (server/personal name). Because the personal
 * name (server/personal name) is unique in a TACO control system
 * the offset will be unique. The hashing function used is very similar
 * to the so-called "coalesced hashing" function also used by E.Taurel
 * in the data collector. It has been simplified here to the following
 *
 * hashing code = sum ( ds_name[i] * 131^i ) modulo 0x10000000
 *
 * The modulo is necessary because transient program numbers must lie 
 * between 40000000 and 5fffffff and to avoid confusion with old servers
 * a base of 0x50000000 has been used. 
 *
 * andy 12jun97
 */


	offset = 0;
	for (i=0; i<strlen(ds_name); i++)
	{
		offset = (offset*key + (u_long)ds_name[i])%maxoffset;
	}

	prognum = base + offset;

/*
 * Because the hashing function is not unique a check must be made for
 * collisions. This is done by looping round testing to see if the program 
 * number is already bound until an unbound program number is found or
 * until all program numbers have been checked.
 */

        get_myaddress(&addr);
	plist = pmap_getmaps(&addr);
	if(NULL == plist)
	{
	  printf("gettransient(): pmap_getmaps() failed , aborting !\n");
	  return 0;
	}
  	found = 0;
        do
        {
          if( prognum >= (base+maxoffset-1) )
	    prognum = base;

	  for(p = plist; p != NULL; p = p->pml_next )
	  {
            if( prognum == p->pml_map.pm_prog )
              break;
          }
          if( p == NULL )
	  {
	    found = 1;
            break;
	  }
	  prognum++;
 	} while( prognum != (base+offset));

	if (found == 0)
	{
	  printf("gettransient(): failed to find free prognum = %d\n", prognum);
	  return 0;
	}
 	
        /*xdr_free((xdrproc_t)xdr_pmap,(char *)plist);*/

        return prognum;
}
