diff --git a/Makefile b/Makefile
index 93327aa93..2f5611586 100644
--- a/Makefile
+++ b/Makefile
@@ -58,6 +58,7 @@ ELFILES = \
 	haskell-indentation.el \
 	haskell-indent.el \
 	haskell-interactive-mode.el \
+	haskell-lexeme.el \
 	haskell-load.el \
 	haskell-menu.el \
 	haskell-mode.el \
diff --git a/haskell-lexeme.el b/haskell-lexeme.el
new file mode 100644
index 000000000..6b292f548
--- /dev/null
+++ b/haskell-lexeme.el
@@ -0,0 +1,223 @@
+;;; haskell-lexeme.el --- haskell lexical tokens   -*- coding: utf-8 -*-
+
+;; Copyright (C) 2015 Gracjan Polak
+
+;; This file is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(require 'rx)
+
+(unless (category-docstring ?P)
+  (define-category ?P "Haskell symbol constituent characters")
+  (map-char-table
+   #'(lambda (key val)
+       (if (or
+            (and (consp key) (> (car key) 128))
+            (and (numberp key) (> key 128)))
+           (if (member val '(Pc Pd Po Sm Sc Sk So))
+	       (modify-category-entry key ?P))))
+   unicode-category-table)
+
+  (dolist (key (string-to-list "!#$%&*+./<=>?@^|~\\-"))
+    (modify-category-entry key ?P)))
+
+(defconst haskell-lexeme-modid
+  "[[:upper:]][[:alnum:]'_]*"
+  "Regexp matching a valid Haskell module identifier.
+
+Note that GHC accepts Unicode category UppercaseLetter as a first
+character. Following letters are from Unicode categories
+UppercaseLetter, LowercaseLetter, OtherLetter, TitlecaseLetter,
+ModifierLetter, DecimalNumber, OtherNumber, backslash or
+underscore.
+
+Note that this differs from constructor identifier as the latter
+one can have any number of hash character at the end to
+accommodate MagicHash extension.")
+
+(defconst haskell-lexeme-id
+  "[[:alpha:]_][[:alnum:]'_]*#*"
+  "Regexp matching a valid Haskell identifier.
+
+GHC accepts a string starting with any alphabetic character or
+underscore followed by any alphanumeric character or underscore
+or apostrophe.")
+
+(defconst haskell-lexeme-sym
+  "\\(:?\\cP\\|:\\)+"
+  "Regexp matching a valid Haskell variable or constructor symbol.
+
+GHC accepts a string of chars from the set
+[:!#$%&*+./<=>?@^|~\\-] or Unicode category Symbol for chars with
+codes larger than 128 only.")
+
+(defconst haskell-lexeme-modid-opt-prefix
+  (concat "\\(?:" haskell-lexeme-modid "\\.\\)*")
+  "Regexp matching a valid Haskell module prefix, potentially empty.
+
+Module path prefix is separated by dots and finishes with a
+dot. For path component syntax see `haskell-lexeme-modid'.")
+
+(defconst haskell-lexeme-qid-or-qsym
+  (rx-to-string `(: (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (| (regexp ,haskell-lexeme-id) (regexp ,haskell-lexeme-sym)
+                              ))))
+  "Regexp matching a valid qualified identifier or symbol.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defconst haskell-lexeme-qid
+  (rx-to-string `(: (regexp "'*")
+                    (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (regexp ,haskell-lexeme-id))))
+  "Regexp matching a valid qualified identifier.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defconst haskell-lexeme-qsym
+  (rx-to-string `(: (regexp "'*")
+                    (regexp ,haskell-lexeme-modid-opt-prefix)
+                    (group (regexp ,haskell-lexeme-id))))
+  "Regexp matching a valid qualified symbol.
+
+Note that (match-string 1) returns the unqualified part.")
+
+(defconst haskell-lexeme-number
+  (rx (| (: (regexp "[0-9]+\\.[0-9]+") (opt (regexp "[eE][-+]?[0-9]+")))
+         (regexp "[0-9]+[eE][-+]?[0-9]+")
+         (regexp "0[xX][0-9a-fA-F]+")
+         (regexp "0[oO][0-7]+")
+         (regexp "[0-9]+")))
+  "Regexp matching a floating point, decimal, octal or hexadecimal number.
+
+Note that negative sign char is not part of a number.")
+
+(defconst haskell-lexeme-char-literal-inside
+  (rx (| (regexp "[^\n'\\]")
+         (: "\\"
+            (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'"
+               "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
+               "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
+               "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
+               "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
+               (: "^" (regexp "[]A-Z@^_\\[]"))))))
+  "Regexp matching an inside of a character literal.")
+
+(defconst haskell-lexeme-char-literal
+  (rx-to-string `(: "'" (regexp ,haskell-lexeme-char-literal-inside) "'"))
+  "Regexp matching a character literal.")
+
+(defconst haskell-lexeme-string-literal-inside
+  (rx (* (| (regexp "[^\n\"\\]")
+            (: "\\"
+               (| "a" "b" "f" "n" "r" "t" "v" "\\" "\"" "'" "&"
+                  "NUL" "SOH" "STX" "ETX" "EOT" "ENQ" "ACK"
+                  "BEL" "BS" "HT" "LF" "VT" "FF" "CR" "SO" "SI" "DLE"
+                  "DC1" "DC2" "DC3" "DC4" "NAK" "SYN" "ETB" "CAN"
+                  "EM" "SUB" "ESC" "FS" "GS" "RS" "US" "SP" "DEL"
+                  (: "^" (regexp "[]A-Z@^_\\[]"))
+                  (regexp "[ \t\n\r\v\f]*\\\\"))))))
+  "Regexp matching an inside of a string literal.")
+
+(defconst haskell-lexeme-string-literal
+  (rx-to-string `(: "\"" (regexp ,haskell-lexeme-string-literal-inside) "\""))
+  "Regexp matching a string literal.")
+
+(defun haskell-lexeme-classify-by-first-char (char)
+  "Classify token by CHAR.
+
+CHAR is a chararacter that is assumed to be first character of a token."
+  (let ((category (get-char-code-property char 'general-category)))
+
+    (cond
+     ((or (member char '(?! ?# ?$ ?% ?& ?* ?+ ?. ?/ ?< ?= ?> ?? ?@ ?^ ?| ?~ ?\\ ?-))
+          (and (> char 127)
+               (member category '(Pc Pd Po Sm Sc Sk So))))
+      'varsym)
+     ((equal char ?:)
+      'consym)
+     ((equal char ?\')
+      'char)
+     ((equal char ?\")
+      'string)
+     ((member category '(Lu Lt))
+      'conid)
+     ((or (equal char ?_)
+          (member category '(Ll Lo)))
+      'varsym)
+     ((and (>= char ?0) (<= char 9))
+      'number)
+     ((member char '(?\] ?\[ ?\( ?\) ?\{ ?\} ?\` ?\, ?\;))
+      'special))))
+
+(defun haskell-lexeme-looking-at-token ()
+  "Like `looking-at' but understands Haskell lexemes.
+
+Moves point forward over whitespace.  Returns a symbol describing
+type of Haskell token recognized.  Use `match-string',
+`match-beginning' and `match-end' with argument 0 to query match
+result.
+
+Possible results are:
+- 'special: for chars [](){}`,;
+- 'comment: for single line comments
+- 'nested-comment: for multiline comments
+- 'qsymid: for qualified identifiers or symbols
+- 'string: for strings literals
+- 'char: for char literals
+- 'decimal: for decimal, float, hexadecimal and octal number literals
+- 'template-haskell-quote: for a string of apostrophes for template haskell
+
+Note that for qualified symbols (match-string 1) returns the
+unqualified identifier or symbol.  Further qualification for
+symbol or identifier can be done with:
+
+   (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1)))
+
+See `haskell-lexeme-classify-by-first-char' for details."
+  (skip-syntax-forward "->")
+  (let
+      ((case-fold-search nil)
+       (point (point-marker)))
+    (or
+     (and (looking-at "{-")
+          (progn
+            (save-excursion
+              (forward-comment 1)
+              (set-match-data (list point (point-marker))))
+            'nested-comment))
+     (and (looking-at haskell-lexeme-char-literal)
+          'char)
+     (and (looking-at haskell-lexeme-string-literal)
+          'string)
+     (and (looking-at "[][(){}`,;]")
+          'special)
+     (and (looking-at haskell-lexeme-qid-or-qsym)
+          (if (and (eq (- (match-end 0) (match-beginning 0)) 2)
+                   (equal (match-string 0) "--"))
+              (progn
+                (set-match-data (list point (set-marker (make-marker) (line-end-position))))
+                'comment)
+            'qsymid))
+     (and (looking-at haskell-lexeme-number)
+          'number)
+     (and (looking-at "'+")
+          'template-haskell-quote))))
+
+(provide 'haskell-lexeme)
+
+;;; haskell-lexeme.el ends here
diff --git a/tests/haskell-lexeme-tests.el b/tests/haskell-lexeme-tests.el
new file mode 100644
index 000000000..b6fc76faa
--- /dev/null
+++ b/tests/haskell-lexeme-tests.el
@@ -0,0 +1,197 @@
+;; unit tests for haskell-string.el
+
+(require 'ert)
+(require 'haskell-lexeme) ;; implementation under test
+(require 'haskell-mode)
+(require 'haskell-font-lock)
+
+(defun check-lexemes (lines lexemes)
+  "Checks if tokenization works as expected.
+
+LINES is a list of strings that will be inserted to a new
+buffer. Then LEXEMES is a list of lexemes that should be found in
+order."
+  (when (get-buffer "*haskell-mode-buffer*")
+    (kill-buffer "*haskell-mode-buffer*"))
+  (save-current-buffer
+    (set-buffer (get-buffer-create "*haskell-mode-buffer*"))
+
+    ;; Note that all of this should work both in haskell-mode and
+    ;; outside of it. Currently we test only haskell-mode setup.
+    (haskell-mode)
+    (font-lock-fontify-buffer)
+
+    (dolist (line lines)
+      (insert line)
+      (insert "\n"))
+
+    (goto-char (point-min))
+    (let (current-token
+          (left-lexemes lexemes))
+      (while (haskell-lexeme-looking-at-token)
+        (setq current-token (match-string-no-properties 0))
+        ;; it should be in the list of lexemes at all
+        (should (member current-token lexemes))
+        ;; it should be next in the list
+        (should (equal (car left-lexemes) current-token))
+        (setq left-lexemes (cdr left-lexemes))
+        (goto-char (match-end 0)))
+      (should (equal nil left-lexemes)))))
+
+
+(ert-deftest haskell-lexeme-basic-tokens-1 ()
+  "Get some basic self delimiting tokens right"
+  (check-lexemes
+   '(")(}{][,;;")
+   '(")" "(" "}" "{" "]" "[" "," ";" ";")))
+
+(ert-deftest haskell-lexeme-qid-1 ()
+  "Indentifiers"
+  (check-lexemes
+   '("head,at_first,safeHead;Data")
+   '("head" "," "at_first" "," "safeHead" ";" "Data")))
+
+(ert-deftest haskell-lexeme-qid-2 ()
+  "Operators (symbols)"
+  (check-lexemes
+   '(">>=,---->,<-;::::")
+   '(">>=" "," "---->" "," "<-" ";" "::::")))
+
+(ert-deftest haskell-lexeme-qid-3 ()
+  "Qualified Indentifiers"
+  (check-lexemes
+   '("Data.List.head,Modu.at_first,Zonk.safeHead;Data.Data")
+   '("Data.List.head" "," "Modu.at_first" "," "Zonk.safeHead" ";" "Data.Data")))
+
+(ert-deftest haskell-lexeme-qid-4 ()
+  "Qualified Operators (symbols)"
+  (check-lexemes
+   '("Monad.>>=,Comment.---->,Func.<-;Cons.::::;Category..")
+   '("Monad.>>=" "," "Comment.---->" "," "Func.<-" ";" "Cons.::::" ";" "Category..")))
+
+(ert-deftest haskell-lexeme-unicode-ids-1 ()
+  "Unicode ids"
+  (check-lexemes
+   '("Żółw.head,Data.żółw,Артур.Артур")
+   '("Żółw.head" "," "Data.żółw" "," "Артур.Артур")))
+
+(ert-deftest haskell-lexeme-unicode-syms-1 ()
+  "Unicode symbols"
+  (check-lexemes
+   '("∷∷,.→,Control.Monad.★★")
+   '("∷∷" "," ".→" "," "Control.Monad.★★")))
+
+
+(ert-deftest haskell-lexeme-spaces ()
+  (check-lexemes
+   '("fun::C.Monad a -> ()"
+     "fun=do { xyz <- abc <*> def; xyz }")
+   '("fun" "::" "C.Monad" "a" "->" "(" ")"
+     "fun" "=" "do" "{" "xyz" "<-" "abc" "<*>" "def" ";" "xyz" "}")))
+
+(ert-deftest haskell-lexeme-japanese-is-treated-as-lowercase ()
+  (check-lexemes
+   '("てすと3 ∷ IO ()"
+     "てすと3 = do"
+     "    putStrLn $ show 人間虫 where"
+     "        人間虫 = x123")
+   '("てすと3" "∷" "IO" "(" ")"
+     "てすと3" "=" "do"
+     "putStrLn" "$" "show" "人間虫" "where"
+     "人間虫" "=" "x123")))
+
+(ert-deftest haskell-lexeme-modifier-letters ()
+  (check-lexemes
+   '("xᵦ xᵦxᵦ xxx###")
+   '("xᵦ" "xᵦxᵦ" "xxx###")))
+
+(ert-deftest haskell-lexeme-char-literal-1 ()
+  (check-lexemes
+   '("'\\ENQ'")
+   '("'\\ENQ'")))
+
+(ert-deftest haskell-lexeme-char-literal-2 ()
+  (check-lexemes
+   '("'\\''")
+   '("'\\''")))
+
+(ert-deftest haskell-lexeme-char-literal-3 ()
+  (check-lexemes
+   '("'\"'")
+   '("'\"'")))
+
+(ert-deftest haskell-lexeme-char-literal-4 ()
+  (check-lexemes
+   '("'D'")
+   '("'D'")))
+
+(ert-deftest haskell-lexeme-string-literal-1 ()
+  (check-lexemes
+   '("\"\\   \\\"")
+   '("\"\\   \\\"")))
+
+(ert-deftest haskell-lexeme-string-literal-1a ()
+  (check-lexemes
+   '("\"\\ \n  \\\"")
+   '("\"\\ \n  \\\"")))
+
+(ert-deftest haskell-lexeme-string-literal-2 ()
+  (check-lexemes
+   '("\"\"")
+   '("\"\"")))
+
+(ert-deftest haskell-lexeme-string-literal-3 ()
+  (check-lexemes
+   '("\"foobar\"")
+   '("\"foobar\"")))
+
+(ert-deftest haskell-lexeme-string-literal-4 ()
+  (check-lexemes
+   '("\"\\^Z\"")
+   '("\"\\^Z\"")))
+
+(ert-deftest haskell-lexeme-string-literal-5 ()
+  (check-lexemes
+   '("\"\\\\\"")
+   '("\"\\\\\"")))
+
+(ert-deftest haskell-lexeme-string-literal-6 ()
+  (check-lexemes
+   '("\"\\ENQ\"")
+   '("\"\\ENQ\"")))
+
+(ert-deftest haskell-lexeme-string-literal-7 ()
+  (check-lexemes
+   '("\"\\\\\"")
+   '("\"\\\\\"")))
+
+(ert-deftest haskell-lexeme-line-comment-1 ()
+  (check-lexemes
+   '("   -- x  "
+     " --%% cons"
+     " -- cons"
+     )
+   '("-- x  "
+     "--%%" "cons"
+     "-- cons"
+     )))
+
+(ert-deftest haskell-lexeme-template-haskell-1 ()
+  (check-lexemes
+   '("  'C  ''C 'x ''x 0x12'x'xx")
+   '("'" "C" "''" "C" "'" "x" "''" "x" "0x12" "'x'" "xx")))
+
+(ert-deftest haskell-lexeme-decimal-numbers-1 ()
+  (check-lexemes
+   '("123+345-123412")
+   '("123" "+" "345" "-" "123412")))
+
+(ert-deftest haskell-lexeme-octal-hexadecimal-numbers-1 ()
+  (check-lexemes
+   '("0o123+0xaf345-0O121 0X234523fff")
+   '("0o123" "+" "0xaf345" "-" "0O121" "0X234523fff")))
+
+(ert-deftest haskell-lexeme-float-numbers-1 ()
+  (check-lexemes
+   '("0.12 34.22.33 1e+23 1e23 1e+33 455.33E1456.4")
+   '("0.12" "34.22" "." "33" "1e+23" "1e23" "1e+33" "455.33E1456" "." "4")))