
Faster Natural Language Processing in Python - julien_c
https://medium.com/huggingface/100-times-faster-natural-language-processing-in-python-ee32033bdced
======
imh
I love seeing Cython usage. Python is cool, but as a scientific language, its
slowness gets in the way sometimes. Particularly when you have control flow
with funny shapes, so it is hard to just apply multipurpose vectorized
functions. In my experience, NLP often has funny shapes like that.

Cython is a wonderful escape hatch to grab performance, and I'd love to see it
used more casually in the data science world.

~~~
__bee
How Cython is compared to other langugaes such Go/Rust ? Any benchmarks out
there .

~~~
dr_zoidberg
Cython is compiled C that uses CPythons objects. If you can distill your
algorithm to a full C(ython) implementation, you get CPython objects + C code,
which is then compiled with the (appropriate version of the) system compiler.

So for example, from this little Cython code:

    
    
        def cy(int x):
            return x + 1
    

You get the following C code:

    
    
        /* Python wrapper */
        static PyObject *__pyx_pw_6hworld_1cy(PyObject *__pyx_self, PyObject *__pyx_arg_x); /*proto*/
        static PyMethodDef __pyx_mdef_6hworld_1cy = {"cy", (PyCFunction)__pyx_pw_6hworld_1cy, METH_O, 0};
        static PyObject *__pyx_pw_6hworld_1cy(PyObject *__pyx_self, PyObject *__pyx_arg_x) {
          int __pyx_v_x;
          PyObject *__pyx_r = 0;
          __Pyx_RefNannyDeclarations
          __Pyx_RefNannySetupContext("cy (wrapper)", 0);
          assert(__pyx_arg_x); {
            __pyx_v_x = __Pyx_PyInt_As_int(__pyx_arg_x); if (unlikely((__pyx_v_x == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L3_error)
          }
          goto __pyx_L4_argument_unpacking_done;
          __pyx_L3_error:;
          __Pyx_AddTraceback("hworld.cy", __pyx_clineno, __pyx_lineno, __pyx_filename);
          __Pyx_RefNannyFinishContext();
          return NULL;
          __pyx_L4_argument_unpacking_done:;
          __pyx_r = __pyx_pf_6hworld_cy(__pyx_self, ((int)__pyx_v_x));
    
          /* function exit code */
          __Pyx_RefNannyFinishContext();
          return __pyx_r;
        }
    
        static PyObject *__pyx_pf_6hworld_cy(CYTHON_UNUSED PyObject *__pyx_self, int __pyx_v_x) {
          PyObject *__pyx_r = NULL;
          __Pyx_RefNannyDeclarations
          __Pyx_RefNannySetupContext("cy", 0);
        /* … */
          /* function exit code */
          __pyx_L1_error:;
          __Pyx_XDECREF(__pyx_t_1);
          __Pyx_AddTraceback("hworld.cy", __pyx_clineno, __pyx_lineno, __pyx_filename);
          __pyx_r = NULL;
          __pyx_L0:;
          __Pyx_XGIVEREF(__pyx_r);
          __Pyx_RefNannyFinishContext();
          return __pyx_r;
        }
        /* … */
          __pyx_tuple_ = PyTuple_Pack(2, __pyx_n_s_x, __pyx_n_s_x); if (unlikely(!__pyx_tuple_)) __PYX_ERR(0, 1, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_tuple_);
          __Pyx_GIVEREF(__pyx_tuple_);
        /* … */
          __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6hworld_1cy, NULL, __pyx_n_s_hworld); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_1);
          if (PyDict_SetItem(__pyx_d, __pyx_n_s_cy, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          __Pyx_XDECREF(__pyx_r);
          __pyx_t_1 = __Pyx_PyInt_From_long((__pyx_v_x + 1)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_1);
          __pyx_r = __pyx_t_1;
          __pyx_t_1 = 0;
          goto __pyx_L0;
    

That compiles and becomes part of a C-extension that you can load with "import
module" from within Python.

