Main Page | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

fast_convert.h

00001 /*
00002  * SpanDSP - a series of DSP components for telephony
00003  *
00004  * fast_convert.h - Quick ways to convert floating point numbers to integers
00005  *
00006  * Written by Steve Underwood <steveu@coppice.org>
00007  *
00008  * Copyright (C) 2009 Steve Underwood
00009  *
00010  * All rights reserved.
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU Lesser General Public License version 2.1,
00014  * as published by the Free Software Foundation.
00015  *
00016  * This program is distributed in the hope that it will be useful,
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019  * GNU Lesser General Public License for more details.
00020  *
00021  * You should have received a copy of the GNU Lesser General Public
00022  * License along with this program; if not, write to the Free Software
00023  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00024  *
00025  * $Id: fast_convert.h,v 1.6 2009/02/26 16:08:51 steveu Exp $
00026  */
00027 
00028 #if !defined(_SPANDSP_FAST_CONVERT_H_)
00029 #define _SPANDSP_FAST_CONVERT_H_
00030 
00031 #if defined(__cplusplus)
00032 extern "C"
00033 {
00034 #endif
00035 
00036 /* The following code, to handle issues with lrint() and lrintf() on various
00037  * platforms, is adapted from similar code in libsndfile, which is:
00038  *
00039  * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
00040  *
00041  * This program is free software; you can redistribute it and/or modify
00042  * it under the terms of the GNU Lesser General Public License as published by
00043  * the Free Software Foundation; either version 2.1 of the License, or
00044  * (at your option) any later version.
00045  *
00046  * This program is distributed in the hope that it will be useful,
00047  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00048  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00049  * GNU Lesser General Public License for more details.
00050  */
00051 
00052 /*
00053  *    On Intel Pentium processors (especially PIII and probably P4), converting
00054  *    from float to int is very slow. To meet the C specs, the code produced by
00055  *    most C compilers targeting Pentium needs to change the FPU rounding mode
00056  *    before the float to int conversion is performed.
00057  *
00058  *    Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
00059  *    is this flushing of the pipeline which is so slow.
00060  *
00061  *    Fortunately the ISO C99 specification defines the functions lrint, lrintf,
00062  *    llrint and llrintf which fix this problem as a side effect.
00063  *
00064  *    On Unix-like systems, the configure process should have detected the
00065  *    presence of these functions. If they weren't found we have to replace them
00066  *    here with a standard C cast.
00067  */
00068 
00069 /*
00070  *    The C99 prototypes for these functions are as follows:
00071  *
00072  *        int rintf(float x);
00073  *        int rint(double x);
00074  *        long int lrintf(float x);
00075  *        long int lrint(double x);
00076  *        long long int llrintf(float x);
00077  *        long long int llrint(double x);
00078  *
00079  *    The presence of the required functions are detected during the configure
00080  *    process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
00081  *    the config file.
00082  */
00083 
00084 #if defined(__CYGWIN__)
00085 #if !defined(__cplusplus)
00086     /*
00087      *    CYGWIN has lrint and lrintf functions, but they are slow and buggy:
00088      *        http://sourceware.org/ml/cygwin/2005-06/msg00153.html
00089      *        http://sourceware.org/ml/cygwin/2005-09/msg00047.html
00090      *    The latest version of cygwin seems to have made no effort to fix this.
00091      *    These replacement functions (pulled from the Public Domain MinGW
00092      *    math.h header) replace the native versions.
00093      */
00094     static __inline__ long int lrint(double x)
00095     {
00096         long int retval;
00097 
00098         __asm__ __volatile__
00099         (
00100             "fistpl %0"
00101             : "=m" (retval)
00102             : "t" (x)
00103             : "st"
00104         );
00105 
00106         return retval;
00107     }
00108 
00109     static __inline__ long int lrintf(float x)
00110     {
00111         long int retval;
00112 
00113         __asm__ __volatile__
00114         (
00115             "fistpl %0"
00116             : "=m" (retval)
00117             : "t" (x)
00118             : "st"
00119         );
00120         return retval;
00121     }
00122 #endif
00123 
00124     /* The fastest way to convert is the equivalent of lrint() */
00125     static __inline__ long int lfastrint(double x)
00126     {
00127         long int retval;
00128 
00129         __asm__ __volatile__
00130         (
00131             "fistpl %0"
00132             : "=m" (retval)
00133             : "t" (x)
00134             : "st"
00135         );
00136 
00137         return retval;
00138     }
00139 
00140     static __inline__ long int lfastrintf(float x)
00141     {
00142         long int retval;
00143 
00144         __asm__ __volatile__
00145         (
00146             "fistpl %0"
00147             : "=m" (retval)
00148             : "t" (x)
00149             : "st"
00150         );
00151         return retval;
00152     }
00153 #elif defined(__GNUC__)
00154 
00155 #if defined(__i386__)
00156     /* These routines are guaranteed fast on an i386 machine. Using the built in
00157        lrint() and lrintf() should be similar, but they may not always be enabled.
00158        Sometimes, especially with "-O0", you might get slow calls to routines. */
00159     static __inline__ long int lfastrint(double x)
00160     {
00161         long int retval;
00162 
00163         __asm__ __volatile__
00164         (
00165             "fistpl %0"
00166             : "=m" (retval)
00167             : "t" (x)
00168             : "st"
00169         );
00170 
00171         return retval;
00172     }
00173 
00174     static __inline__ long int lfastrintf(float x)
00175     {
00176         long int retval;
00177 
00178         __asm__ __volatile__
00179         (
00180             "fistpl %0"
00181             : "=m" (retval)
00182             : "t" (x)
00183             : "st"
00184         );
00185         return retval;
00186     }
00187 #elif defined(__x86_64__)
00188     /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
00189        double or float to an int. It looks like the design on the x86_64 took account
00190        of the default behaviour specified for C. */
00191     static __inline__ long int lfastrint(double x)
00192     {
00193         return (long int) (x);
00194     }
00195 
00196     static __inline__ long int lfastrintf(float x)
00197     {
00198         return (long int) (x);
00199     }
00200 #elif defined(__ppc__)  ||   defined(__powerpc__)
00201     static __inline__ long int lfastrint(register double x)
00202     {
00203         int res[2];
00204 
00205         __asm__ __volatile__
00206         (
00207             "fctiw %1, %1\n\t"
00208             "stfd %1, %0"
00209             : "=m" (res)    /* Output */
00210             : "f" (x)       /* Input */
00211             : "memory"
00212         );
00213 
00214         return res[1];
00215     }
00216 
00217     static __inline__ long int lfastrintf(register float x)
00218     {
00219         int res[2];
00220 
00221         __asm__ __volatile__
00222         (
00223             "fctiw %1, %1\n\t"
00224             "stfd %1, %0"
00225             : "=m" (res)    /* Output */
00226             : "f" (x)       /* Input */
00227             : "memory"
00228         );
00229 
00230         return res[1];
00231     }
00232 #endif
00233 
00234 #elif defined(_M_IX86)
00235     /* Visual Studio i386 */
00236     /*
00237      *    Win32 doesn't seem to have the lrint() and lrintf() functions.
00238      *    Therefore implement inline versions of these functions here.
00239      */
00240 
00241     __inline long int lrint(double x)
00242     {
00243         long int i;
00244 
00245         _asm
00246         {
00247             fld x
00248             fistp i
00249         };
00250         return i;
00251     }
00252 
00253     __inline long int lrintf(float x)
00254     {
00255         long int i;
00256 
00257         _asm
00258         {
00259             fld x
00260             fistp i
00261         };
00262         return i;
00263     }
00264 
00265     __inline float rintf(float flt)
00266     {
00267         _asm
00268         {       fld flt
00269                 frndint
00270         }
00271     }
00272 
00273     __inline double rint(double dbl)
00274     {
00275         _asm 
00276         {
00277             fld dbl
00278             frndint
00279         }
00280     }
00281 
00282     __inline long int lfastrint(double x)
00283     {
00284         long int i;
00285 
00286         _asm
00287         {
00288             fld x
00289             fistp i
00290         };
00291         return i;
00292     }
00293 
00294     __inline long int lfastrintf(float x)
00295     {
00296         long int i;
00297 
00298         _asm
00299         {
00300             fld x
00301             fistp i
00302         };
00303         return i;
00304     }
00305 #elif defined(_M_X64)
00306     /* Visual Studio x86_64 */
00307     /* x86_64 machines will do best with a simple assignment. */
00308 #include <intrin.h>
00309 
00310     __inline long int lrint(double x)
00311     {
00312                 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
00313     }
00314 
00315     __inline long int lrintf(float x)
00316     {
00317                 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
00318     }
00319 
00320     __inline long int lfastrint(double x)
00321     {
00322         return (long int) (x);
00323     }
00324 
00325     __inline long int lfastrintf(float x)
00326     {
00327         return (long int) (x);
00328     }
00329 #elif defined(__MWERKS__)  &&  defined(macintosh)
00330     /* This MacOS 9 solution was provided by Stephane Letz */
00331 
00332     long int __inline__ lfastrint(register double x)
00333     {
00334         long int res[2];
00335 
00336         asm
00337         {
00338             fctiw x, x
00339             stfd x, res
00340         }
00341         return res[1];
00342     }
00343 
00344     long int __inline__ lfastrintf(register float x)
00345     {
00346         long int res[2];
00347 
00348         asm
00349         {
00350             fctiw x, x
00351             stfd x, res
00352         }
00353         return res[1];
00354     }
00355 #elif defined(__MACH__)  &&  defined(__APPLE__)  &&  (defined(__ppc__)  ||  defined(__powerpc__))
00356     /* For Apple Mac OS/X - do recent versions still need this? */
00357 
00358     static __inline__ long int lfastrint(register double x)
00359     {
00360         int res[2];
00361 
00362         __asm__ __volatile__
00363         (
00364             "fctiw %1, %1\n\t"
00365             "stfd %1, %0"
00366             : "=m" (res)    /* Output */
00367             : "f" (x)       /* Input */
00368             : "memory"
00369         );
00370 
00371         return res[1];
00372     }
00373 
00374     static __inline__ long int lfastrintf(register float x)
00375     {
00376         int res[2];
00377 
00378         __asm__ __volatile__
00379         (
00380             "fctiw %1, %1\n\t"
00381             "stfd %1, %0"
00382             : "=m" (res)    /* Output */
00383             : "f" (x)       /* Input */
00384             : "memory"
00385         );
00386 
00387         return res[1];
00388     }
00389 #else
00390     /* There is nothing else to do, but use a simple casting operation, instead of a real
00391        rint() type function. Since we are only trying to use rint() to speed up conversions,
00392        the accuracy issues related to changing the rounding scheme are of little concern
00393        to us. */
00394 
00395     #if !defined(__sgi)
00396         #warning "No usable lrint() and lrintf() functions available."
00397         #warning "Replacing these functions with a simple C cast."
00398     #endif
00399 
00400     static __inline__ long int lrint(double x)
00401     {
00402         return (long int) (x);
00403     }
00404 
00405     static __inline__ long int lrintf(float x)
00406     {
00407         return (long int) (x);
00408     }
00409 
00410     static __inline__ long int lfastrint(double x)
00411     {
00412         return (long int) (x);
00413     }
00414 
00415     static __inline__ long int lfastrintf(float x)
00416     {
00417         return (long int) (x);
00418     }
00419 #endif
00420 
00421 #if defined(__cplusplus)
00422 }
00423 #endif
00424 
00425 #endif
00426 
00427 /*- End of file ------------------------------------------------------------*/

Generated on Fri Aug 28 20:12:26 2009 for spandsp by  doxygen 1.3.9.1