;;; merd-mode.el --- Major mode for editing Merd programs

;; user definable variables
;; vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv

(defcustom me-indent-offset 4
  "*Amount of offset per level of indentation"
  :type 'integer
  :group 'merd)


(defcustom me-backspace-function 'backward-delete-char-untabify
  "*Function called by `me-electric-backspace' when deleting backwards."
  :type 'function
  :group 'merd)


(defcustom me-delete-function 'delete-char
  "*Function called by `me-electric-delete' when deleting forwards."
  :type 'function
  :group 'merd)

(defvar me-honor-comment-indentation t)
(defvar me-align-multiline-strings-p t)
(defvar me-block-comment-prefix "# ")



;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;; NO USER DEFINABLE VARIABLES BEYOND THIS POINT

(defvar bident-regexp "[a-zA-Z0-9_]*[?!']*")
(defvar ident-regexp (concat "[a-z_]" bident-regexp))
(defvar constr-regexp (concat "[A-Z]" bident-regexp))
(defvar fqident-regexp (concat "\\(" constr-regexp "\\.\\)+" ident-regexp))
(defvar fqconstr-regexp (concat "\\(" constr-regexp "\\.\\)*" constr-regexp))

(defconst me-outdent-keywords
  '("return" "raise" "break" "next"))
(defconst me-intdent-keywords-cont
  '("then" "else" "with"))
(defconst me-intdent-keywords (append me-intdent-keywords-cont 
  '("if" "while" "try" "do" "cond")))
(defconst me-closing-aroundblock
  "[)}]\\|\\]")

(defvar merd-font-lock-keywords
  (let ((kw1 (mapconcat 'identity
			(append me-outdent-keywords me-intdent-keywords '("and" "or"))
			"\\|"))
	)
    (list
     ;; keywords
     (cons (concat "\\b\\(" kw1 "\\)\\b[ \n\t(]") 1)
     (cons (concat "\\(:=\\|\\?:\\|[-=<:]+>\\)") 1)
     ;; functions
     (cons (concat "\\b\\(" fqident-regexp "\\)\\b") '(1 font-lock-function-name-face))
     (cons (concat "\\b\\(" fqconstr-regexp "\\)\\b") '(1 font-lock-constant-face))

     (cons (concat "\\(" (mapconcat 'identity '("[,.]") "\\|") "\\)") '(1 font-lock-builtin-face))
     ))
  "Additional expressions to highlight in Merd mode.")
(put 'merd-mode 'font-lock-defaults '(merd-font-lock-keywords))

(make-variable-buffer-local 'me-indent-offset)


;; Constants

;; Regexp matching blank or comment lines.
(defconst me-blank-or-comment-re "[ \t]*\\($\\|#\\)")

;; Regexp matching clauses to be outdented one level.
(defconst me-outdent-re
  (concat "\\(" 
	  (mapconcat 'identity (append me-intdent-keywords-cont '(".*:=")) "\\|")
	  "\\)"))

;; Regexp matching keywords which typically close a block
(defconst me-block-closing-keywords-re
  (concat (mapconcat 'identity me-outdent-keywords "\\|") "\\>"))

(defconst me-statement-opens-block-re
  (concat ".*\\(" 
	  (mapconcat 'identity (cons "[+=->(]" me-intdent-keywords) "\\|")
	  "\\) *\n"))


;; Utilities

(defmacro me-safe (&rest body)
  ;; safely execute BODY, return nil if an error occurred
  (` (condition-case nil
	 (progn (,@ body))
       (error nil))))

(defsubst me-keep-region-active ()
  ;; Do whatever is necessary to keep the region active in XEmacs.
  ;; Ignore byte-compiler warnings you might see.  Also note that
  ;; FSF's Emacs 19 does it differently; its policy doesn't require us
  ;; to take explicit action.
  (and (boundp 'zmacs-region-stays)
       (setq zmacs-region-stays t)))

(defsubst me-point (position)
  ;; Returns the value of point at certain commonly referenced POSITIONs.
  ;; POSITION can be one of the following symbols:
  ;; 
  ;; bol  -- beginning of line
  ;; eol  -- end of line
  ;; bod  -- beginning of defun
  ;; boi  -- back to indentation
  ;; 
  ;; This function does not modify point or mark.
  (let ((here (point)))
    (cond
     ((eq position 'bol) (beginning-of-line))
     ((eq position 'eol) (end-of-line))
     ((eq position 'bob) (beginning-of-buffer))
     ((eq position 'eob) (end-of-buffer))
     ((eq position 'boi) (back-to-indentation))
     (t (error "unknown buffer position requested: %s" position))
     )
    (prog1
	(point)
      (goto-char here))))

(defun me-in-literal (&optional lim)
  ;; Determine if point is in a Merd literal, defined as a comment
  ;; or string.  This is the version used for non-XEmacs, which has a
  ;; nicer interface.
  ;;
  ;; WARNING: Watch out for infinite recursion.
  (let* (state (parse-partial-sexp lim (point)))
    (cond
     ((nth 3 state) 'string)
     ((nth 4 state) 'comment)
     (t nil))))

;; XEmacs has a built-in function that should make this much quicker.
;; In this case, lim is ignored
(defun me-fast-in-literal (&optional lim)
  ;; don't have to worry about context == 'block-comment
  (buffer-syntactic-context))

(if (fboundp 'buffer-syntactic-context)
    (defalias 'c-in-literal 'c-fast-in-literal))



;; Major mode boilerplate

;; define a mode-specific abbrev table for those who use such things
(defvar merd-mode-abbrev-table nil
  "Abbrev table in use in `merd-mode' buffers.")
(define-abbrev-table 'merd-mode-abbrev-table nil)

(defvar merd-mode-hook nil
  "*Hook called by `merd-mode'.")

(defvar me-mode-map ()
  "Keymap used in `merd-mode' buffers.")
(if me-mode-map
    nil
  (setq me-mode-map (make-sparse-keymap))

  (if (not (boundp 'delete-key-deletes-forward))
      (define-key me-mode-map "\177" 'me-electric-backspace)
    (define-key me-mode-map [delete]    'me-electric-delete)
    (define-key me-mode-map [backspace] 'me-electric-backspace))

  (define-key me-mode-map "\n"   'me-newline-and-indent)
  (define-key me-mode-map "\C-m" 'me-newline-and-indent)
  )

(defvar me-mode-syntax-table nil
  "Syntax table used in `merd-mode' buffers.")
(if me-mode-syntax-table
    nil
  (setq me-mode-syntax-table (make-syntax-table))
  (modify-syntax-entry ?\( "()" me-mode-syntax-table)
  (modify-syntax-entry ?\) ")(" me-mode-syntax-table)
  (modify-syntax-entry ?\[ "(]" me-mode-syntax-table)
  (modify-syntax-entry ?\] ")[" me-mode-syntax-table)
  (modify-syntax-entry ?\{ "(}" me-mode-syntax-table)
  (modify-syntax-entry ?\} "){" me-mode-syntax-table)
  (modify-syntax-entry ?\_ "w"  me-mode-syntax-table)
  (modify-syntax-entry ?\' "w"  me-mode-syntax-table)
  (modify-syntax-entry ?\! "w"  me-mode-syntax-table)
  (modify-syntax-entry ?\? "w"  me-mode-syntax-table)
  ;; double quote are string delimiters
  (modify-syntax-entry ?\" "\"" me-mode-syntax-table)

  ;; comment delimiters
  (modify-syntax-entry ?\# "<"  me-mode-syntax-table)
  (modify-syntax-entry ?\n ">"  me-mode-syntax-table)

  ;; Add operator symbols misassigned in the std table
  (modify-syntax-entry ?\$ "."  me-mode-syntax-table)
  (modify-syntax-entry ?\% "."  me-mode-syntax-table)
  (modify-syntax-entry ?\& "."  me-mode-syntax-table)
  (modify-syntax-entry ?\* "."  me-mode-syntax-table)
  (modify-syntax-entry ?\+ "."  me-mode-syntax-table)
  (modify-syntax-entry ?\- "."  me-mode-syntax-table)
  (modify-syntax-entry ?\/ "."  me-mode-syntax-table)
  (modify-syntax-entry ?\< "."  me-mode-syntax-table)
  (modify-syntax-entry ?\= "."  me-mode-syntax-table)
  (modify-syntax-entry ?\> "."  me-mode-syntax-table)
  (modify-syntax-entry ?\| "."  me-mode-syntax-table)

  )


;;;###autoload
(defun merd-mode ()
  "Major mode for editing Merd files"
  (interactive)
  ;; set up local variables
  (kill-all-local-variables)
  (make-local-variable 'font-lock-defaults)
  (make-local-variable 'paragraph-separate)
  (make-local-variable 'paragraph-start)
  (make-local-variable 'require-final-newline)
  (make-local-variable 'comment-start)
  (make-local-variable 'comment-end)
  (make-local-variable 'comment-start-skip)
  (make-local-variable 'comment-column)
  (make-local-variable 'indent-region-function)
  (make-local-variable 'indent-line-function)
  ;;
  (set-syntax-table me-mode-syntax-table)
  (setq major-mode             'merd-mode
	mode-name              "Merd"
	local-abbrev-table     merd-mode-abbrev-table
	font-lock-defaults     '(merd-font-lock-keywords)
	paragraph-separate     "^[ \t]*$"
	paragraph-start        "^[ \t]*$"
	require-final-newline  t
	comment-start          "# "
	comment-end            ""
	comment-start-skip     "# *"
	comment-column         40
	indent-region-function 'me-indent-region
	indent-line-function   'me-indent-line
	;; tell add-log.el how to find the current function/method/variable
	)
  (use-local-map me-mode-map)
  ;; Run the mode hook.
  (if merd-mode-hook (run-hooks 'merd-mode-hook))
)


;; electric characters
(defun me-outdent-p ()
  ;; returns non-nil if the current line should outdent one level
  (save-excursion
    (and (progn (back-to-indentation)
		(looking-at me-outdent-re))
	 (progn (forward-line -1)
		(beginning-of-line)
		(back-to-indentation)
		(while (or (looking-at me-blank-or-comment-re)
			   (bobp))
		  (backward-to-indentation 1))
		(not (looking-at me-statement-opens-block-re)))
	 )))
      



;; Electric deletion
(defun me-electric-backspace (arg)
  "Deletes preceding character or levels of indentation.
Deletion is performed by calling the function in `me-backspace-function'
with a single argument (the number of characters to delete).

If point is at the leftmost column, deletes the preceding newline.

Otherwise, if point is at the leftmost non-whitespace character of a
line that is neither a continuation line nor a non-indenting comment
line, or if point is at the end of a blank line, this command reduces
the indentation to match that of the line that opened the current
block of code.  The line that opened the block is displayed in the
echo area to help you keep track of where you are.  With numeric arg,
outdents that many blocks (but not past column zero).

Otherwise the preceding character is deleted, converting a tab to
spaces if needed so that only a single column position is deleted.
Numeric argument deletes that many preceding characters."
  (interactive "*p")
  (if (or (/= (current-indentation) (current-column)) (bolp))
      (funcall me-backspace-function arg)
    ;; else indent the same as the colon line that opened the block
    ;; force non-blank so me-goto-block-up doesn't ignore it
    (insert-char ?* 1)
    (backward-char)
    (let ((base-indent 0)		; indentation of base line
	  (base-text "")		; and text of base line
	  (base-found-p nil))
      (save-excursion
	(while (< 0 arg)
	  (condition-case nil		; in case no enclosing block
	      (progn
		(me-goto-block-up 'no-mark)
		(setq base-indent (current-indentation)
		      base-text   (me-suck-up-leading-text)
		      base-found-p t))
	    (error nil))
	  (setq arg (1- arg))))
      (delete-char 1)			; toss the dummy character
      (delete-horizontal-space)
      (indent-to base-indent)
      (if base-found-p
	  (message "Closes block: %s" base-text)))))


(defun me-electric-delete (arg)
  "Deletes preceding or following character or levels of whitespace.

The behavior of this function depends on the variable
`delete-key-deletes-forward'.  If this variable is nil (or does not
exist, as in older Emacsen), then this function behaves identical to
\\[c-electric-backspace].

If `delete-key-deletes-forward' is non-nil and is supported in your
Emacs, then deletion occurs in the forward direction, by calling the
function in `me-delete-function'."
  (interactive "*p")
  (if (and (boundp 'delete-key-deletes-forward)
	   delete-key-deletes-forward)
      (funcall me-delete-function arg)
    ;; else
    (me-electric-backspace arg)))

;; required for pending-del and delsel modes
(put 'me-electric-backspace 'delete-selection 'supersede) ;delsel
(put 'me-electric-backspace 'pending-delete   'supersede) ;pending-del
(put 'me-electric-delete    'delete-selection 'supersede) ;delsel
(put 'me-electric-delete    'pending-delete   'supersede) ;pending-del



(defun me-indent-line (&optional arg)
  "Fix the indentation of the current line according to Merd rules.
With \\[universal-argument], ignore outdenting rules for block
closing statements (e.g. return, raise, break, continue, pass)

This function is normally bound to `indent-line-function' so
\\[indent-for-tab-command] will call it."
  (interactive "P")
  (let* ((ci (current-indentation))
	 (move-to-indentation-p (<= (current-column) ci))
	 (need (me-compute-indentation (not arg))))
    ;; see if we need to outdent
    (if (me-outdent-p)
	(setq need (- need me-indent-offset)))
    (if (/= ci need)
	(save-excursion
	  (beginning-of-line)
	  (delete-horizontal-space)
	  (indent-to need)))
    (if move-to-indentation-p (back-to-indentation))))

(defun me-newline-and-indent ()
  "Strives to act like the Emacs `newline-and-indent'.
This is just `strives to' because correct indentation can't be computed
from scratch for Merd code.  In general, deletes the whitespace before
point, inserts a newline, and takes an educated guess as to how you want
the new line indented."
  (interactive)
  (let ((ci (current-indentation)))
    (if (< ci (current-column))		; if point beyond indentation
	(newline-and-indent)
      ;; else try to act like newline-and-indent "normally" acts
      (beginning-of-line)
      (insert-char ?\n 1)
      (move-to-column ci))))



(defun me-compute-indentation (honor-block-close-p)
  ;; implements all the rules for indentation computation.  when
  ;; honor-block-close-p is non-nil, statements such as return, raise,
  ;; break, continue, and pass force one level of outdenting.
  (save-excursion
    (back-to-indentation)
    (if (looking-at me-closing-aroundblock)
	(progn
	  (re-search-forward (concat me-closing-aroundblock "+"))
	  (backward-sexp)
	  (current-indentation))
      (forward-line -1)
      (while (looking-at "[ \t]*\n") (forward-line -1))
      (+ (current-indentation) 
	 (if (me-statement-opens-block-p)
	     me-indent-offset
	   (if (me-statement-closes-block-p)
	       (- me-indent-offset)
	     0)))
      )))

(defun me-indent-region (start end &optional indent-offset)
  "Reindent a region of Merd code.

The lines from the line containing the start of the current region up
to (but not including) the line containing the end of the region are
reindented.  If the first line of the region has a non-whitespace
character in the first column, the first line is left alone and the
rest of the region is reindented with respect to it.  Else the entire
region is reindented with respect to the (closest code or indenting
comment) statement immediately preceding the region.

This is useful when code blocks are moved or yanked, when enclosing
control structures are introduced or removed, or to reformat code
using a new value for the indentation offset.

If a numeric prefix argument is given, it will be used as the value of
the indentation offset.  Else the value of `me-indent-offset' will be
used.

Warning: The region must be consistently indented before this function
is called!  This function does not compute proper indentation from
scratch (that's impossible in Merd), it merely adjusts the existing
indentation to be correct in context.

Warning: This function really has no idea what to do with
non-indenting comment lines, and shifts them as if they were indenting
comment lines.  Fixing this appears to require telepathy.

Special cases: whitespace is deleted from blank lines; continuation
lines are shifted by the same amount their initial line was shifted,
in order to preserve their relative indentation with respect to their
initial line; and comment lines beginning in column 1 are ignored."
  (interactive "*r\nP")			; region; raw prefix arg
  (save-excursion
    (goto-char end)   (beginning-of-line) (setq end (point-marker))
    (goto-char start) (beginning-of-line)
    (let ((me-indent-offset (prefix-numeric-value
			     (or indent-offset me-indent-offset)))
	  (indents '(-1))		; stack of active indent levels
	  (target-column 0)		; column to which to indent
	  (base-shifted-by 0)		; amount last base line was shifted
	  (indent-base (if (looking-at "[ \t\n]")
			   (me-compute-indentation t)
			 0))
	  ci)
      (while (< (point) end)
	(setq ci (current-indentation))
	;; figure out appropriate target column
	(cond
	 ((or (eq (following-char) ?#)	; comment in column 1
	      (looking-at "[ \t]*$"))	; entirely blank
	  (setq target-column 0))	 
	 (t				; new base line
	  (if (> ci (car indents))	; going deeper; push it
	      (setq indents (cons ci indents))
	    ;; else we should have seen this indent before
	    (setq indents (memq ci indents)) ; pop deeper indents
	    (if (null indents)
		(error "Bad indentation in region, at line %d"
		       (save-restriction
			 (widen)
			 (1+ (count-lines 1 (point)))))))
	  (setq target-column (+ indent-base
				 (* me-indent-offset
				    (- (length indents) 2))))
	  (setq base-shifted-by (- target-column ci))))
	;; shift as needed
	(if (/= ci target-column)
	    (progn
	      (delete-horizontal-space)
	      (indent-to target-column)))
	(forward-line 1))))
  (set-marker end nil))

(defun me-comment-region (beg end &optional arg)
  "Like `comment-region' but uses double hash (`#') comment starter."
  (interactive "r\nP")
  (let ((comment-start me-block-comment-prefix))
    (comment-region beg end arg)))


;; Functions for moving point
(defun me-previous-statement (count)
  "Go to the start of previous Merd statement.
If the statement at point is the i'th Merd statement, goes to the
start of statement i-COUNT.  If there is no such statement, goes to the
first statement.  Returns count of statements left to move.
`Statements' do not include blank, comment, or continuation lines."
  (interactive "p")			; numeric prefix arg
  (if (< count 0) (me-next-statement (- count))
    (beginning-of-line)
    (let (start)
      (while (and
	      (setq start (point))	; always true -- side effect
	      (> count 0)
	      (zerop (forward-line -1))
	      (me-goto-statement-at-or-above))
	(setq count (1- count)))
      (if (> count 0) (goto-char start)))
    count))

(defun me-next-statement (count)
  "Go to the start of next Merd statement.
If the statement at point is the i'th Merd statement, goes to the
start of statement i+COUNT.  If there is no such statement, goes to the
last statement.  Returns count of statements left to move.  `Statements'
do not include blank, comment, or continuation lines."
  (interactive "p")			; numeric prefix arg
  (if (< count 0) (me-previous-statement (- count))
    (beginning-of-line)
    (let (start)
      (while (and
	      (setq start (point))	; always true -- side effect
	      (> count 0)
	      (me-goto-statement-below))
	(setq count (1- count)))
      (if (> count 0) (goto-char start)))
    count))

(defun me-goto-block-up (&optional nomark)
  "Move up to start of current block.
Go to the statement that starts the smallest enclosing block; roughly
speaking, this will be the closest preceding statement that ends with a
colon and is indented less than the statement you started on.  If
successful, also sets the mark to the starting point.

`\\[me-mark-block]' can be used afterward to mark the whole code
block, if desired.

If called from a program, the mark will not be set if optional argument
NOMARK is not nil."
  (interactive)
  (let ((start (point))
	(found nil)
	(initial-indent (current-indentation)))
    (back-to-indentation)
    (if (looking-at me-closing-aroundblock)
	(progn 
	  (forward-char 1)
	  (backward-sexp))
      (while (not (or found (bobp)))
	(forward-line -1)
	(setq found (< (current-indentation) initial-indent)))
      (if found
	  (progn
	    (or nomark (push-mark start))
	    (back-to-indentation))
	(goto-char start)
	(error "Enclosing block not found")))))



;; TODO
;; Helper functions
(defvar me-parse-state-re
  (concat
   "^[ \t]*\\(if\\|elif\\|elsee\\|while\\|def\\|class\\)\\>"
   "\\|"
   "^[^ #\t\n]"))

;; TODO
;; returns the parse state at point (see parse-partial-sexp docs)
(defun me-parse-state ()
  (save-excursion
    (let ((here (point))
	  pps done)
      (while (not done)
	;; back up to the first preceding line (if any; else start of
	;; buffer) that begins with a popular Merd keyword, or a
	;; non- whitespace and non-comment character.  These are good
	;; places to start parsing to see whether where we started is
	;; at a non-zero nesting level.  It may be slow for people who
	;; write huge code blocks or huge lists ... tough beans.
	(re-search-backward me-parse-state-re nil 'move)
	(beginning-of-line)
	;; In XEmacs, we have a much better way to test for whether
	;; we're in a triple-quoted string or not.  Emacs does not
	;; have this built-in function, which is it's loss because
	;; without scanning from the beginning of the buffer, there's
	;; no accurate way to determine this otherwise.
	(if (not (fboundp 'buffer-syntactic-context))
	    ;; Emacs
	    (progn
	      (save-excursion (setq pps (parse-partial-sexp (point) here)))
	      ;; make sure we don't land inside a triple-quoted string
	      (setq done (or (not (nth 3 pps))
			     (bobp))))
	  ;; XEmacs
	  (setq done (or (not (buffer-syntactic-context))
			 (bobp)))
	  (when done
	    (setq pps (parse-partial-sexp (point) here)))
	  ))
      pps)))

;; t iff statement opens a block == iff it ends with a colon that's
;; not in a comment.  point should be at the start of a statement
(defun me-statement-opens-block-p ()
  (save-excursion
    (back-to-indentation)
    (looking-at me-statement-opens-block-re)))

;; true iff the current statement `closes' a block
(defun me-statement-closes-block-p ()
  (save-excursion
    (back-to-indentation)
    (looking-at me-block-closing-keywords-re)))

;; go to start of first statement (not blank or comment or
;; continuation line) at or preceding point.  returns t if there is
;; one, else nil
(defun me-goto-statement-at-or-above ()
  (beginning-of-line)
  (if (looking-at me-blank-or-comment-re)
      ;; skip back over blank & comment lines
      ;; note:  will skip a blank or comment line that happens to be
      ;; a continuation line too
      (if (re-search-backward "^[ \t]*[^ \t#\n]" nil t)
	  (progn (beginning-of-line) t)
	nil)
    t))

;; go to start of first statement (not blank or comment or
;; continuation line) following the statement containing point returns
;; t if there is one, else nil
(defun me-goto-statement-below ()
  (beginning-of-line)
  (let ((start (point)))
    (forward-line 1)
    (while (and
	    (looking-at me-blank-or-comment-re)
	    (not (eobp)))
      (forward-line 1))
    (if (eobp)
	(progn (goto-char start) nil)
      t)))

;; return string in buffer from start of indentation to end of line
(defun me-suck-up-leading-text ()
  (save-excursion
    (back-to-indentation)
    (concat
     (if (bolp) "" "...")
     (buffer-substring (point) (progn (end-of-line) (point))))))


(provide 'merd-mode)
;;; merd-mode.el ends here

